Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep

feat: support FTS options on RemoteTable
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-12-25 06:19:57 +00:00 · 2024-11-11 17:36:06 +08:00 · 2024-11-08 18:49:13 +08:00 · 2024-11-08 18:49:09 +08:00 · 2024-11-01 15:17:25 +08:00 · 2024-11-01 15:15:23 +08:00
64 changed files with 2219 additions and 7923 deletions
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -31,7 +31,7 @@ jobs:
      - name: Install dependecies needed for ubuntu
        run: |
          sudo apt install -y protobuf-compiler libssl-dev
-          rustup update && rustup default
+          rustup update && rustup default        
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -41,8 +41,8 @@ jobs:
      - name: Build Python
        working-directory: python
        run: |
-          python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
-          python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
+          python -m pip install -e .
+          python -m pip install -r ../docs/requirements.txt
      - name: Set up node
        uses: actions/setup-node@v3
        with:
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -49,7 +49,7 @@ jobs:
    - name: Build Python
      working-directory: docs/test
      run:
-        python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r requirements.txt
+        python -m pip install -r requirements.txt
    - name: Create test files
      run: |
        cd docs/test
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -53,9 +53,6 @@ jobs:
        cargo clippy --all --all-features -- -D warnings
        npm ci
        npm run lint-ci
-    - name: Lint examples
-      working-directory: nodejs/examples
-      run: npm ci && npm run lint-ci
  linux:
    name: Linux (NodeJS ${{ matrix.node-version }})
    timeout-minutes: 30
@@ -94,19 +91,6 @@ jobs:
      env:
        S3_TEST: "1"
      run: npm run test
-    - name: Setup examples
-      working-directory: nodejs/examples
-      run: npm ci
-    - name: Test examples
-      working-directory: ./
-      env:
-        OPENAI_API_KEY: test
-        OPENAI_BASE_URL: http://0.0.0.0:8000
-      run: |
-        python ci/mock_openai.py &
-        ss -ltnp | grep :8000
-        cd nodejs/examples
-        npm test
  macos:
    timeout-minutes: 30
    runs-on: "macos-14"
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -232,7 +232,21 @@ jobs:
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
      - uses: actions/checkout@v4
+      - name: Cache installations
+        id: cache-installs
+        uses: actions/cache@v4
+        with:
+          path: |
+            C:\Program Files\Git
+            C:\BuildTools
+            C:\Program Files (x86)\Windows Kits
+            C:\Program Files\7-Zip
+            C:\protoc
+          key: ${{ runner.os }}-arm64-installs-v1
+          restore-keys: |
+            ${{ runner.os }}-arm64-installs-
      - name: Install Git
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
@@ -249,6 +263,7 @@ jobs:
        with:
          python-version: "3.13"
      - name: Install Visual Studio Build Tools
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
@@ -297,6 +312,7 @@ jobs:
        with:
          workspaces: rust
      - name: Install 7-Zip ARM
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          New-Item -Path 'C:\7zip' -ItemType Directory
          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
@@ -306,6 +322,7 @@ jobs:
        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
        shell: powershell
      - name: Install Protoc v21.12
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        working-directory: C:\
        run: |
          if (Test-Path 'C:\protoc') {
@@ -369,7 +386,21 @@ jobs:
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
      - uses: actions/checkout@v4
+      - name: Cache installations
+        id: cache-installs
+        uses: actions/cache@v4
+        with:
+          path: |
+            C:\Program Files\Git
+            C:\BuildTools
+            C:\Program Files (x86)\Windows Kits
+            C:\Program Files\7-Zip
+            C:\protoc
+          key: ${{ runner.os }}-arm64-installs-v1
+          restore-keys: |
+            ${{ runner.os }}-arm64-installs-
      - name: Install Git
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
@@ -386,6 +417,7 @@ jobs:
        with:
          python-version: "3.13"
      - name: Install Visual Studio Build Tools
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
@@ -424,6 +456,7 @@ jobs:
        with:
          workspaces: rust
      - name: Install 7-Zip ARM
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          New-Item -Path 'C:\7zip' -ItemType Directory
          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
@@ -433,6 +466,7 @@ jobs:
        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
        shell: powershell
      - name: Install Protoc v21.12
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        working-directory: C:\
        run: |
          if (Test-Path 'C:\protoc') {
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -138,7 +138,7 @@ jobs:
        run: rm -rf target/wheels
  windows:
    name: "Windows: ${{ matrix.config.name }}"
-    timeout-minutes: 60
+    timeout-minutes: 30
    strategy:
      matrix:
        config:
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -50,7 +50,6 @@ jobs:
        run: cargo fmt --all -- --check
      - name: Run clippy
        run: cargo clippy --workspace --tests --all-features -- -D warnings
-
  linux:
    timeout-minutes: 30
    # To build all features, we need more disk space than is available
@@ -92,7 +91,6 @@ jobs:
        run: cargo test --all-features
      - name: Run examples
        run: cargo run --example simple
-
  macos:
    timeout-minutes: 30
    strategy:
@@ -120,7 +118,6 @@ jobs:
      - name: Run tests
        # Run with everything except the integration tests.
        run: cargo test --features remote,fp16kernels
-
  windows:
    runs-on: windows-2022
    steps:
@@ -142,11 +139,24 @@ jobs:
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
          cargo build
          cargo test
-
  windows-arm64:
    runs-on: windows-4x-arm
    steps:
+      - name: Cache installations
+        id: cache-installs
+        uses: actions/cache@v4
+        with:
+          path: |
+            C:\Program Files\Git
+            C:\BuildTools
+            C:\Program Files (x86)\Windows Kits
+            C:\Program Files\7-Zip
+            C:\protoc
+          key: ${{ runner.os }}-arm64-installs-v1
+          restore-keys: |
+            ${{ runner.os }}-arm64-installs-
      - name: Install Git
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile "git-installer.exe"
          Start-Process -FilePath "git-installer.exe" -ArgumentList "/VERYSILENT", "/NORESTART" -Wait
@@ -163,6 +173,7 @@ jobs:
        with:
          python-version: "3.13"
      - name: Install Visual Studio Build Tools
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vs_buildtools.exe" -OutFile "vs_buildtools.exe"
          Start-Process -FilePath "vs_buildtools.exe" -ArgumentList "--quiet", "--wait", "--norestart", "--nocache", `
@@ -206,10 +217,12 @@ jobs:
        run: |
          Add-Content $env:GITHUB_PATH "$env:USERPROFILE\.cargo\bin"
        shell: powershell
+
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
      - name: Install 7-Zip ARM
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        run: |
          New-Item -Path 'C:\7zip' -ItemType Directory
          Invoke-WebRequest https://7-zip.org/a/7z2408-arm64.exe -OutFile C:\7zip\7z-installer.exe
@@ -219,11 +232,12 @@ jobs:
        run: Add-Content $env:GITHUB_PATH "C:\Program Files\7-Zip"
        shell: powershell
      - name: Install Protoc v21.12
+        if: steps.cache-installs.outputs.cache-hit != 'true'
        working-directory: C:\
        run: |
          if (Test-Path 'C:\protoc') {
-            Write-Host "Protoc directory exists, skipping installation"
-            return
+              Write-Host "Protoc directory exists, skipping installation"
+              return
          }
          New-Item -Path 'C:\protoc' -ItemType Directory
          Set-Location C:\protoc
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,15 +21,13 @@ categories = ["database-implementations"]
 rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.

 [workspace.dependencies]
-lance = { "version" = "=0.19.2", "features" = [
-    "dynamodb",
-], git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-index = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-linalg = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-table = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-testing = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-datafusion = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
-lance-encoding = { "version" = "=0.19.2", git = "https://github.com/lancedb/lance.git", tag = "v0.19.2" }
+lance = { "version" = "=0.19.2", "features" = ["dynamodb"], path = "../lance/rust/lance"}
+lance-index = { "version" = "=0.19.2", path = "../lance/rust/lance-index"}
+lance-linalg = { "version" = "=0.19.2", path = "../lance/rust/lance-linalg"}
+lance-testing = { "version" = "=0.19.2", path = "../lance/rust/lance-testing"}
+lance-datafusion = { "version" = "=0.19.2", path = "../lance/rust/lance-datafusion"}
+lance-encoding = { "version" = "=0.19.2", path = "../lance/rust/lance-encoding"}
+lance-table = { "version" = "=0.19.2", path = "../lance/rust/lance-table"}
 # Note that this one does not include pyarrow
 arrow = { version = "52.2", optional = false }
 arrow-array = "52.2"
--- a/ci/mock_openai.py
+++ b/ci/mock_openai.py
@@ -1,57 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-"""A zero-dependency mock OpenAI embeddings API endpoint for testing purposes."""
-import argparse
-import json
-import http.server
-
-
-class MockOpenAIRequestHandler(http.server.BaseHTTPRequestHandler):
-    def do_POST(self):
-        content_length = int(self.headers["Content-Length"])
-        post_data = self.rfile.read(content_length)
-        post_data = json.loads(post_data.decode("utf-8"))
-        # See: https://platform.openai.com/docs/api-reference/embeddings/create
-
-        if isinstance(post_data["input"], str):
-            num_inputs = 1
-        else:
-            num_inputs = len(post_data["input"])
-
-        model = post_data.get("model", "text-embedding-ada-002")
-
-        data = []
-        for i in range(num_inputs):
-            data.append({
-                "object": "embedding",
-                "embedding": [0.1] * 1536,
-                "index": i,
-            })
-
-        response = {
-            "object": "list",
-            "data": data,
-            "model": model,
-            "usage": {
-                "prompt_tokens": 0,
-                "total_tokens": 0,
-            }
-        }
-
-        self.send_response(200)
-        self.send_header("Content-type", "application/json")
-        self.end_headers()
-        self.wfile.write(json.dumps(response).encode("utf-8"))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Mock OpenAI embeddings API endpoint")
-    parser.add_argument("--port", type=int, default=8000, help="Port to listen on")
-    args = parser.parse_args()
-    port = args.port
-
-    print(f"server started on port {port}. Press Ctrl-C to stop.")
-    print(f"To use, set OPENAI_BASE_URL=http://localhost:{port} in your environment.")
-
-    with http.server.HTTPServer(("0.0.0.0", port), MockOpenAIRequestHandler) as server:
-        server.serve_forever()
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -45,9 +45,9 @@ Lance supports `IVF_PQ` index type by default.
        Creating indexes is done via the [lancedb.Table.createIndex](../js/classes/Table.md/#createIndex) method.

        ```typescript
-        --8<--- "nodejs/examples/ann_indexes.test.ts:import"
+        --8<--- "nodejs/examples/ann_indexes.ts:import"

-        --8<-- "nodejs/examples/ann_indexes.test.ts:ingest"
+        --8<-- "nodejs/examples/ann_indexes.ts:ingest"
        ```

    === "vectordb (deprecated)"
@@ -140,15 +140,13 @@ There are a couple of parameters that can be used to fine-tune the search:

 - **limit** (default: 10): The amount of results that will be returned
 - **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
-  Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
-    - _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
-  
+  Most of the time, setting nprobes to cover 5-10% of the dataset should achieve high recall with low latency.<br/>
+  e.g., for 1M vectors divided up into 256 partitions, nprobes should be set to ~20-40.<br/>
+  Note: nprobes is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.
 - **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
  A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
-    - _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
-!!! note 
-    Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
-
+  e.g., for 1M vectors divided into 256 partitions, if you're looking for top 20, then refine_factor=200 reranks the whole partition.<br/>
+  Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored.

 === "Python"

@@ -171,7 +169,7 @@ There are a couple of parameters that can be used to fine-tune the search:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.test.ts:search1"
+        --8<-- "nodejs/examples/ann_indexes.ts:search1"
        ```

    === "vectordb (deprecated)"
@@ -205,7 +203,7 @@ You can further filter the elements returned by a search using a where clause.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.test.ts:search2"
+        --8<-- "nodejs/examples/ann_indexes.ts:search2"
        ```

    === "vectordb (deprecated)"
@@ -237,7 +235,7 @@ You can select the columns returned by the query using a select clause.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/ann_indexes.test.ts:search3"
+        --8<-- "nodejs/examples/ann_indexes.ts:search3"
        ```

    === "vectordb (deprecated)"
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -157,7 +157,7 @@ recommend switching to stable releases.
        import * as lancedb from "@lancedb/lancedb";
        import * as arrow from "apache-arrow";

-        --8<-- "nodejs/examples/basic.test.ts:connect"
+        --8<-- "nodejs/examples/basic.ts:connect"
        ```

    === "vectordb (deprecated)"
@@ -212,7 +212,7 @@ table.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:create_table"
+        --8<-- "nodejs/examples/basic.ts:create_table"
        ```

    === "vectordb (deprecated)"
@@ -268,7 +268,7 @@ similar to a `CREATE TABLE` statement in SQL.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:create_empty_table"
+        --8<-- "nodejs/examples/basic.ts:create_empty_table"
        ```

    === "vectordb (deprecated)"
@@ -298,7 +298,7 @@ Once created, you can open a table as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:open_table"
+        --8<-- "nodejs/examples/basic.ts:open_table"
        ```

    === "vectordb (deprecated)"
@@ -327,7 +327,7 @@ If you forget the name of your table, you can always get a listing of all table
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:table_names"
+        --8<-- "nodejs/examples/basic.ts:table_names"
        ```

    === "vectordb (deprecated)"
@@ -357,7 +357,7 @@ After a table has been created, you can always add more data to it as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:add_data"
+        --8<-- "nodejs/examples/basic.ts:add_data"
        ```

    === "vectordb (deprecated)"
@@ -389,7 +389,7 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:vector_search"
+        --8<-- "nodejs/examples/basic.ts:vector_search"
        ```

    === "vectordb (deprecated)"
@@ -429,7 +429,7 @@ LanceDB allows you to create an ANN index on a table as follows:
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:create_index"
+        --8<-- "nodejs/examples/basic.ts:create_index"
        ```

    === "vectordb (deprecated)"
@@ -469,7 +469,7 @@ This can delete any number of rows that match the filter.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:delete_rows"
+        --8<-- "nodejs/examples/basic.ts:delete_rows"
        ```

    === "vectordb (deprecated)"
@@ -527,7 +527,7 @@ Use the `drop_table()` method on the database to remove a table.
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:drop_table"
+        --8<-- "nodejs/examples/basic.ts:drop_table"
        ```

    === "vectordb (deprecated)"
@@ -561,8 +561,8 @@ You can use the embedding API when working with embedding models. It automatical
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/embedding.test.ts:imports"
-        --8<-- "nodejs/examples/embedding.test.ts:openai_embeddings"
+        --8<-- "nodejs/examples/embedding.ts:imports"
+        --8<-- "nodejs/examples/embedding.ts:openai_embeddings"
        ```

 === "Rust"
--- a/docs/src/embeddings/custom_embedding_function.md
+++ b/docs/src/embeddings/custom_embedding_function.md
@@ -47,9 +47,9 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp
 === "TypeScript"

    ```ts
-    --8<--- "nodejs/examples/custom_embedding_function.test.ts:imports"
+    --8<--- "nodejs/examples/custom_embedding_function.ts:imports"

-    --8<--- "nodejs/examples/custom_embedding_function.test.ts:embedding_impl"
+    --8<--- "nodejs/examples/custom_embedding_function.ts:embedding_impl"
    ```


@@ -78,7 +78,7 @@ Now you can use this embedding function to create your table schema and that's i
 === "TypeScript"

    ```ts
-    --8<--- "nodejs/examples/custom_embedding_function.test.ts:call_custom_function"
+    --8<--- "nodejs/examples/custom_embedding_function.ts:call_custom_function"
    ```

 !!! note
--- a/docs/src/embeddings/embedding_functions.md
+++ b/docs/src/embeddings/embedding_functions.md
@@ -94,8 +94,8 @@ the embeddings at all:
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/embedding.test.ts:imports"
-        --8<-- "nodejs/examples/embedding.test.ts:embedding_function"
+        --8<-- "nodejs/examples/embedding.ts:imports"
+        --8<-- "nodejs/examples/embedding.ts:embedding_function"
        ```

    === "vectordb (deprecated)"
@@ -150,7 +150,7 @@ need to worry about it when you query the table:
            .toArray()
        ```

-    === "vectordb (deprecated)"
+    === "vectordb (deprecated)

        ```ts
        const results = await table
--- a/docs/src/embeddings/index.md
+++ b/docs/src/embeddings/index.md
@@ -51,8 +51,8 @@ LanceDB registers the OpenAI embeddings function in the registry as `openai`. Yo
 === "TypeScript"

    ```typescript
-    --8<--- "nodejs/examples/embedding.test.ts:imports"
-    --8<--- "nodejs/examples/embedding.test.ts:openai_embeddings"
+    --8<--- "nodejs/examples/embedding.ts:imports"
+    --8<--- "nodejs/examples/embedding.ts:openai_embeddings"
    ```

 === "Rust"
@@ -121,10 +121,12 @@ class Words(LanceModel):
    vector: Vector(func.ndims()) = func.VectorField()

 table = db.create_table("words", schema=Words)
-table.add([
-    {"text": "hello world"},
-    {"text": "goodbye world"}
-])
+table.add(
+    [
+        {"text": "hello world"},
+        {"text": "goodbye world"}
+    ]
+    )

 query = "greetings"
 actual = table.search(query).limit(1).to_pydantic(Words)[0]
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -85,13 +85,13 @@ Initialize a LanceDB connection and create a table


        ```ts
-        --8<-- "nodejs/examples/basic.test.ts:create_table"
+        --8<-- "nodejs/examples/basic.ts:create_table"
        ```

        This will infer the schema from the provided data. If you want to explicitly provide a schema, you can use `apache-arrow` to declare a schema

        ```ts
-        --8<-- "nodejs/examples/basic.test.ts:create_table_with_schema"
+        --8<-- "nodejs/examples/basic.ts:create_table_with_schema"
        ```

        !!! info "Note"
@@ -100,14 +100,14 @@ Initialize a LanceDB connection and create a table
            passed in will NOT be appended to the table in that case.

        ```ts
-        --8<-- "nodejs/examples/basic.test.ts:create_table_exists_ok"
+        --8<-- "nodejs/examples/basic.ts:create_table_exists_ok"
        ```

        Sometimes you want to make sure that you start fresh. If you want to
        overwrite the table, you can pass in mode: "overwrite" to the createTable function.

        ```ts
-        --8<-- "nodejs/examples/basic.test.ts:create_table_overwrite"
+        --8<-- "nodejs/examples/basic.ts:create_table_overwrite"
        ```

    === "vectordb (deprecated)"
@@ -227,7 +227,7 @@ LanceDB supports float16 data type!
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:create_f16_table"
+        --8<-- "nodejs/examples/basic.ts:create_f16_table"
        ```

    === "vectordb (deprecated)"
@@ -455,7 +455,7 @@ You can create an empty table for scenarios where you want to add data to the ta
    === "@lancedb/lancedb"

        ```typescript
-        --8<-- "nodejs/examples/basic.test.ts:create_empty_table"
+        --8<-- "nodejs/examples/basic.ts:create_empty_table"
        ```

    === "vectordb (deprecated)"
--- a/docs/src/notebooks/hybrid_search.ipynb
+++ b/docs/src/notebooks/hybrid_search.ipynb
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -58,9 +58,9 @@ db.create_table("my_vectors", data=data)
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/search.test.ts:import"
+        --8<-- "nodejs/examples/search.ts:import"

-        --8<-- "nodejs/examples/search.test.ts:search1"
+        --8<-- "nodejs/examples/search.ts:search1"
        ```


@@ -89,7 +89,7 @@ By default, `l2` will be used as metric type. You can specify the metric type as
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/search.test.ts:search2"
+        --8<-- "nodejs/examples/search.ts:search2"
        ```

    === "vectordb (deprecated)"
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -49,7 +49,7 @@ const tbl = await db.createTable('myVectors', data)
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.test.ts:search"
+        --8<-- "nodejs/examples/filtering.ts:search"
        ```

    === "vectordb (deprecated)"
@@ -91,7 +91,7 @@ For example, the following filter string is acceptable:
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.test.ts:vec_search"
+        --8<-- "nodejs/examples/filtering.ts:vec_search"
        ```

    === "vectordb (deprecated)"
@@ -169,7 +169,7 @@ You can also filter your data without search.
    === "@lancedb/lancedb"

        ```ts
-        --8<-- "nodejs/examples/filtering.test.ts:sql_search"
+        --8<-- "nodejs/examples/filtering.ts:sql_search"
        ```

    === "vectordb (deprecated)"
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -998,18 +998,4 @@ describe("column name options", () => {
    const results = await table.query().where("`camelCase` = 1").toArray();
    expect(results[0].camelCase).toBe(1);
  });
-
-  test("can make multiple vector queries in one go", async () => {
-    const results = await table
-      .query()
-      .nearestTo([0.1, 0.2])
-      .addQueryVector([0.1, 0.2])
-      .limit(1)
-      .toArray();
-    console.log(results);
-    expect(results.length).toBe(2);
-    results.sort((a, b) => a.query_index - b.query_index);
-    expect(results[0].query_index).toBe(0);
-    expect(results[1].query_index).toBe(1);
-  });
 });
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -9,8 +9,7 @@
      "**/native.js",
      "**/native.d.ts",
      "**/npm/**/*",
-      "**/.vscode/**",
-      "./examples/*"
+      "**/.vscode/**"
    ]
  },
  "formatter": {
--- a/nodejs/examples/ann_indexes.test.ts
+++ b/nodejs/examples/ann_indexes.test.ts
@@ -1,57 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-// --8<-- [start:import]
-import * as lancedb from "@lancedb/lancedb";
-import { VectorQuery } from "@lancedb/lancedb";
-// --8<-- [end:import]
-import { withTempDirectory } from "./util.ts";
-
-test("ann index examples", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    // --8<-- [start:ingest]
-    const db = await lancedb.connect(databaseDir);
-
-    const data = Array.from({ length: 5_000 }, (_, i) => ({
-      vector: Array(128).fill(i),
-      id: `${i}`,
-      content: "",
-      longId: `${i}`,
-    }));
-
-    const table = await db.createTable("my_vectors", data, {
-      mode: "overwrite",
-    });
-    await table.createIndex("vector", {
-      config: lancedb.Index.ivfPq({
-        numPartitions: 10,
-        numSubVectors: 16,
-      }),
-    });
-    // --8<-- [end:ingest]
-
-    // --8<-- [start:search1]
-    const search = table.search(Array(128).fill(1.2)).limit(2) as VectorQuery;
-    const results1 = await search.nprobes(20).refineFactor(10).toArray();
-    // --8<-- [end:search1]
-    expect(results1.length).toBe(2);
-
-    // --8<-- [start:search2]
-    const results2 = await table
-      .search(Array(128).fill(1.2))
-      .where("id != '1141'")
-      .limit(2)
-      .toArray();
-    // --8<-- [end:search2]
-    expect(results2.length).toBe(2);
-
-    // --8<-- [start:search3]
-    const results3 = await table
-      .search(Array(128).fill(1.2))
-      .select(["id"])
-      .limit(2)
-      .toArray();
-    // --8<-- [end:search3]
-    expect(results3.length).toBe(2);
-  });
-}, 100_000);
--- a/nodejs/examples/ann_indexes.ts
+++ b/nodejs/examples/ann_indexes.ts
@@ -0,0 +1,49 @@
+// --8<-- [start:import]
+import * as lancedb from "@lancedb/lancedb";
+// --8<-- [end:import]
+
+// --8<-- [start:ingest]
+const db = await lancedb.connect("/tmp/lancedb/");
+
+const data = Array.from({ length: 10_000 }, (_, i) => ({
+  vector: Array(1536).fill(i),
+  id: `${i}`,
+  content: "",
+  longId: `${i}`,
+}));
+
+const table = await db.createTable("my_vectors", data, { mode: "overwrite" });
+await table.createIndex("vector", {
+  config: lancedb.Index.ivfPq({
+    numPartitions: 16,
+    numSubVectors: 48,
+  }),
+});
+// --8<-- [end:ingest]
+
+// --8<-- [start:search1]
+const _results1 = await table
+  .search(Array(1536).fill(1.2))
+  .limit(2)
+  .nprobes(20)
+  .refineFactor(10)
+  .toArray();
+// --8<-- [end:search1]
+
+// --8<-- [start:search2]
+const _results2 = await table
+  .search(Array(1536).fill(1.2))
+  .where("id != '1141'")
+  .limit(2)
+  .toArray();
+// --8<-- [end:search2]
+
+// --8<-- [start:search3]
+const _results3 = await table
+  .search(Array(1536).fill(1.2))
+  .select(["id"])
+  .limit(2)
+  .toArray();
+// --8<-- [end:search3]
+
+console.log("Ann indexes: done");
--- a/nodejs/examples/basic.test.ts
+++ b/nodejs/examples/basic.test.ts
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-// --8<--  [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import * as arrow from "apache-arrow";
-import {
-  Field,
-  FixedSizeList,
-  Float16,
-  Int32,
-  Schema,
-  Utf8,
-} from "apache-arrow";
-// --8<-- [end:imports]
-import { withTempDirectory } from "./util.ts";
-
-test("basic table examples", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    // --8<-- [start:connect]
-    const db = await lancedb.connect(databaseDir);
-    // --8<-- [end:connect]
-    {
-      // --8<-- [start:create_table]
-      const _tbl = await db.createTable(
-        "myTable",
-        [
-          { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-          { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-        ],
-        { mode: "overwrite" },
-      );
-      // --8<-- [end:create_table]
-
-      const data = [
-        { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-        { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-      ];
-
-      {
-        // --8<-- [start:create_table_exists_ok]
-        const tbl = await db.createTable("myTable", data, {
-          existOk: true,
-        });
-        // --8<-- [end:create_table_exists_ok]
-        expect(await tbl.countRows()).toBe(2);
-      }
-      {
-        // --8<-- [start:create_table_overwrite]
-        const tbl = await db.createTable("myTable", data, {
-          mode: "overwrite",
-        });
-        // --8<-- [end:create_table_overwrite]
-        expect(await tbl.countRows()).toBe(2);
-      }
-    }
-
-    await db.dropTable("myTable");
-
-    {
-      // --8<-- [start:create_table_with_schema]
-      const schema = new arrow.Schema([
-        new arrow.Field(
-          "vector",
-          new arrow.FixedSizeList(
-            2,
-            new arrow.Field("item", new arrow.Float32(), true),
-          ),
-        ),
-        new arrow.Field("item", new arrow.Utf8(), true),
-        new arrow.Field("price", new arrow.Float32(), true),
-      ]);
-      const data = [
-        { vector: [3.1, 4.1], item: "foo", price: 10.0 },
-        { vector: [5.9, 26.5], item: "bar", price: 20.0 },
-      ];
-      const tbl = await db.createTable("myTable", data, {
-        schema,
-      });
-      // --8<-- [end:create_table_with_schema]
-      expect(await tbl.countRows()).toBe(2);
-    }
-
-    {
-      // --8<-- [start:create_empty_table]
-
-      const schema = new arrow.Schema([
-        new arrow.Field("id", new arrow.Int32()),
-        new arrow.Field("name", new arrow.Utf8()),
-      ]);
-
-      const emptyTbl = await db.createEmptyTable("empty_table", schema);
-      // --8<-- [end:create_empty_table]
-      expect(await emptyTbl.countRows()).toBe(0);
-    }
-    {
-      // --8<-- [start:open_table]
-      const _tbl = await db.openTable("myTable");
-      // --8<-- [end:open_table]
-    }
-
-    {
-      // --8<-- [start:table_names]
-      const tableNames = await db.tableNames();
-      // --8<-- [end:table_names]
-      expect(tableNames).toEqual(["empty_table", "myTable"]);
-    }
-
-    const tbl = await db.openTable("myTable");
-    {
-      // --8<-- [start:add_data]
-      const data = [
-        { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
-        { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
-      ];
-      await tbl.add(data);
-      // --8<-- [end:add_data]
-    }
-    {
-      // --8<-- [start:vector_search]
-      const res = await tbl.search([100, 100]).limit(2).toArray();
-      // --8<-- [end:vector_search]
-      expect(res.length).toBe(2);
-    }
-    {
-      const data = Array.from({ length: 1000 })
-        .fill(null)
-        .map(() => ({
-          vector: [Math.random(), Math.random()],
-          item: "autogen",
-          price: Math.round(Math.random() * 100),
-        }));
-
-      await tbl.add(data);
-    }
-
-    // --8<-- [start:create_index]
-    await tbl.createIndex("vector");
-    // --8<-- [end:create_index]
-
-    // --8<-- [start:delete_rows]
-    await tbl.delete('item = "fizz"');
-    // --8<-- [end:delete_rows]
-
-    // --8<-- [start:drop_table]
-    await db.dropTable("myTable");
-    // --8<-- [end:drop_table]
-    await db.dropTable("empty_table");
-
-    {
-      // --8<-- [start:create_f16_table]
-      const db = await lancedb.connect(databaseDir);
-      const dim = 16;
-      const total = 10;
-      const f16Schema = new Schema([
-        new Field("id", new Int32()),
-        new Field(
-          "vector",
-          new FixedSizeList(dim, new Field("item", new Float16(), true)),
-          false,
-        ),
-      ]);
-      const data = lancedb.makeArrowTable(
-        Array.from(Array(total), (_, i) => ({
-          id: i,
-          vector: Array.from(Array(dim), Math.random),
-        })),
-        { schema: f16Schema },
-      );
-      const _table = await db.createTable("f16_tbl", data);
-      // --8<-- [end:create_f16_table]
-      await db.dropTable("f16_tbl");
-    }
-  });
-});
--- a/nodejs/examples/basic.ts
+++ b/nodejs/examples/basic.ts
@@ -0,0 +1,162 @@
+// --8<--  [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import * as arrow from "apache-arrow";
+import {
+  Field,
+  FixedSizeList,
+  Float16,
+  Int32,
+  Schema,
+  Utf8,
+} from "apache-arrow";
+
+// --8<-- [end:imports]
+
+// --8<-- [start:connect]
+const uri = "/tmp/lancedb/";
+const db = await lancedb.connect(uri);
+// --8<-- [end:connect]
+{
+  // --8<-- [start:create_table]
+  const tbl = await db.createTable(
+    "myTable",
+    [
+      { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+      { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+    ],
+    { mode: "overwrite" },
+  );
+  // --8<-- [end:create_table]
+
+  const data = [
+    { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+    { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+  ];
+
+  {
+    // --8<-- [start:create_table_exists_ok]
+    const tbl = await db.createTable("myTable", data, {
+      existsOk: true,
+    });
+    // --8<-- [end:create_table_exists_ok]
+  }
+  {
+    // --8<-- [start:create_table_overwrite]
+    const _tbl = await db.createTable("myTable", data, {
+      mode: "overwrite",
+    });
+    // --8<-- [end:create_table_overwrite]
+  }
+}
+
+{
+  // --8<-- [start:create_table_with_schema]
+  const schema = new arrow.Schema([
+    new arrow.Field(
+      "vector",
+      new arrow.FixedSizeList(
+        2,
+        new arrow.Field("item", new arrow.Float32(), true),
+      ),
+    ),
+    new arrow.Field("item", new arrow.Utf8(), true),
+    new arrow.Field("price", new arrow.Float32(), true),
+  ]);
+  const data = [
+    { vector: [3.1, 4.1], item: "foo", price: 10.0 },
+    { vector: [5.9, 26.5], item: "bar", price: 20.0 },
+  ];
+  const _tbl = await db.createTable("myTable", data, {
+    schema,
+  });
+  // --8<-- [end:create_table_with_schema]
+}
+
+{
+  // --8<-- [start:create_empty_table]
+
+  const schema = new arrow.Schema([
+    new arrow.Field("id", new arrow.Int32()),
+    new arrow.Field("name", new arrow.Utf8()),
+  ]);
+
+  const empty_tbl = await db.createEmptyTable("empty_table", schema);
+  // --8<-- [end:create_empty_table]
+}
+{
+  // --8<-- [start:open_table]
+  const _tbl = await db.openTable("myTable");
+  // --8<-- [end:open_table]
+}
+
+{
+  // --8<-- [start:table_names]
+  const tableNames = await db.tableNames();
+  console.log(tableNames);
+  // --8<-- [end:table_names]
+}
+
+const tbl = await db.openTable("myTable");
+{
+  // --8<-- [start:add_data]
+  const data = [
+    { vector: [1.3, 1.4], item: "fizz", price: 100.0 },
+    { vector: [9.5, 56.2], item: "buzz", price: 200.0 },
+  ];
+  await tbl.add(data);
+  // --8<-- [end:add_data]
+}
+{
+  // --8<-- [start:vector_search]
+  const _res = tbl.search([100, 100]).limit(2).toArray();
+  // --8<-- [end:vector_search]
+}
+{
+  const data = Array.from({ length: 1000 })
+    .fill(null)
+    .map(() => ({
+      vector: [Math.random(), Math.random()],
+      item: "autogen",
+      price: Math.round(Math.random() * 100),
+    }));
+
+  await tbl.add(data);
+}
+
+// --8<-- [start:create_index]
+await tbl.createIndex("vector");
+// --8<-- [end:create_index]
+
+// --8<-- [start:delete_rows]
+await tbl.delete('item = "fizz"');
+// --8<-- [end:delete_rows]
+
+// --8<-- [start:drop_table]
+await db.dropTable("myTable");
+// --8<-- [end:drop_table]
+await db.dropTable("empty_table");
+
+{
+  // --8<-- [start:create_f16_table]
+  const db = await lancedb.connect("/tmp/lancedb");
+  const dim = 16;
+  const total = 10;
+  const f16Schema = new Schema([
+    new Field("id", new Int32()),
+    new Field(
+      "vector",
+      new FixedSizeList(dim, new Field("item", new Float16(), true)),
+      false,
+    ),
+  ]);
+  const data = lancedb.makeArrowTable(
+    Array.from(Array(total), (_, i) => ({
+      id: i,
+      vector: Array.from(Array(dim), Math.random),
+    })),
+    { schema: f16Schema },
+  );
+  const _table = await db.createTable("f16_tbl", data);
+  // --8<-- [end:create_f16_table]
+  await db.dropTable("f16_tbl");
+}
--- a/nodejs/examples/custom_embedding_function.test.ts
+++ b/nodejs/examples/custom_embedding_function.test.ts
@@ -1,76 +0,0 @@
-import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-// --8<-- [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import {
-  LanceSchema,
-  TextEmbeddingFunction,
-  getRegistry,
-  register,
-} from "@lancedb/lancedb/embedding";
-// --8<-- [end:imports]
-import { withTempDirectory } from "./util.ts";
-
-// --8<-- [start:embedding_impl]
-@register("sentence-transformers")
-class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
-  name = "Xenova/all-miniLM-L6-v2";
-  #ndims!: number;
-  extractor!: FeatureExtractionPipeline;
-
-  async init() {
-    this.extractor = await pipeline("feature-extraction", this.name, {
-      dtype: "fp32",
-    });
-    this.#ndims = await this.generateEmbeddings(["hello"]).then(
-      (e) => e[0].length,
-    );
-  }
-
-  ndims() {
-    return this.#ndims;
-  }
-
-  toJSON() {
-    return {
-      name: this.name,
-    };
-  }
-  async generateEmbeddings(texts: string[]) {
-    const output = await this.extractor(texts, {
-      pooling: "mean",
-      normalize: true,
-    });
-    return output.tolist();
-  }
-}
-// -8<-- [end:embedding_impl]
-
-test("Registry examples", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    // --8<-- [start:call_custom_function]
-    const registry = getRegistry();
-
-    const sentenceTransformer = await registry
-      .get<SentenceTransformersEmbeddings>("sentence-transformers")!
-      .create();
-
-    const schema = LanceSchema({
-      vector: sentenceTransformer.vectorField(),
-      text: sentenceTransformer.sourceField(),
-    });
-
-    const db = await lancedb.connect(databaseDir);
-    const table = await db.createEmptyTable("table", schema, {
-      mode: "overwrite",
-    });
-
-    await table.add([{ text: "hello" }, { text: "world" }]);
-
-    const results = await table.search("greeting").limit(1).toArray();
-    // -8<-- [end:call_custom_function]
-    expect(results.length).toBe(1);
-  });
-}, 100_000);
--- a/nodejs/examples/custom_embedding_function.ts
+++ b/nodejs/examples/custom_embedding_function.ts
@@ -0,0 +1,64 @@
+// --8<-- [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import {
+  LanceSchema,
+  TextEmbeddingFunction,
+  getRegistry,
+  register,
+} from "@lancedb/lancedb/embedding";
+import { pipeline } from "@xenova/transformers";
+// --8<-- [end:imports]
+
+// --8<-- [start:embedding_impl]
+@register("sentence-transformers")
+class SentenceTransformersEmbeddings extends TextEmbeddingFunction {
+  name = "Xenova/all-miniLM-L6-v2";
+  #ndims!: number;
+  extractor: any;
+
+  async init() {
+    this.extractor = await pipeline("feature-extraction", this.name);
+    this.#ndims = await this.generateEmbeddings(["hello"]).then(
+      (e) => e[0].length,
+    );
+  }
+
+  ndims() {
+    return this.#ndims;
+  }
+
+  toJSON() {
+    return {
+      name: this.name,
+    };
+  }
+  async generateEmbeddings(texts: string[]) {
+    const output = await this.extractor(texts, {
+      pooling: "mean",
+      normalize: true,
+    });
+    return output.tolist();
+  }
+}
+// -8<-- [end:embedding_impl]
+
+// --8<-- [start:call_custom_function]
+const registry = getRegistry();
+
+const sentenceTransformer = await registry
+  .get<SentenceTransformersEmbeddings>("sentence-transformers")!
+  .create();
+
+const schema = LanceSchema({
+  vector: sentenceTransformer.vectorField(),
+  text: sentenceTransformer.sourceField(),
+});
+
+const db = await lancedb.connect("/tmp/db");
+const table = await db.createEmptyTable("table", schema, { mode: "overwrite" });
+
+await table.add([{ text: "hello" }, { text: "world" }]);
+
+const results = await table.search("greeting").limit(1).toArray();
+console.log(results[0].text);
+// -8<-- [end:call_custom_function]
--- a/nodejs/examples/embedding.test.ts
+++ b/nodejs/examples/embedding.test.ts
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-// --8<-- [start:imports]
-import * as lancedb from "@lancedb/lancedb";
-import "@lancedb/lancedb/embedding/openai";
-import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
-import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
-import { type Float, Float32, Utf8 } from "apache-arrow";
-// --8<-- [end:imports]
-import { withTempDirectory } from "./util.ts";
-
-const openAiTest = process.env.OPENAI_API_KEY == null ? test.skip : test;
-
-openAiTest("openai embeddings", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    // --8<-- [start:openai_embeddings]
-    const db = await lancedb.connect(databaseDir);
-    const func = getRegistry()
-      .get("openai")
-      ?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
-
-    const wordsSchema = LanceSchema({
-      text: func.sourceField(new Utf8()),
-      vector: func.vectorField(),
-    });
-    const tbl = await db.createEmptyTable("words", wordsSchema, {
-      mode: "overwrite",
-    });
-    await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
-
-    const query = "greetings";
-    const actual = (await tbl.search(query).limit(1).toArray())[0];
-    // --8<-- [end:openai_embeddings]
-    expect(actual).toHaveProperty("text");
-  });
-});
-
-test("custom embedding function", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    // --8<-- [start:embedding_function]
-    const db = await lancedb.connect(databaseDir);
-
-    @register("my_embedding")
-    class MyEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): Float {
-        return new Float32();
-      }
-      async computeQueryEmbeddings(_data: string) {
-        // This is a placeholder for a real embedding function
-        return [1, 2, 3];
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        // This is a placeholder for a real embedding function
-        return Array.from({ length: data.length }).fill([
-          1, 2, 3,
-        ]) as number[][];
-      }
-    }
-
-    const func = new MyEmbeddingFunction();
-
-    const data = [{ text: "pepperoni" }, { text: "pineapple" }];
-
-    // Option 1: manually specify the embedding function
-    const table = await db.createTable("vectors", data, {
-      embeddingFunction: {
-        function: func,
-        sourceColumn: "text",
-        vectorColumn: "vector",
-      },
-      mode: "overwrite",
-    });
-
-    // Option 2: provide the embedding function through a schema
-
-    const schema = LanceSchema({
-      text: func.sourceField(new Utf8()),
-      vector: func.vectorField(),
-    });
-
-    const table2 = await db.createTable("vectors2", data, {
-      schema,
-      mode: "overwrite",
-    });
-    // --8<-- [end:embedding_function]
-    expect(await table.countRows()).toBe(2);
-    expect(await table2.countRows()).toBe(2);
-  });
-});
--- a/nodejs/examples/embedding.ts
+++ b/nodejs/examples/embedding.ts
@@ -0,0 +1,83 @@
+// --8<-- [start:imports]
+import * as lancedb from "@lancedb/lancedb";
+import { LanceSchema, getRegistry, register } from "@lancedb/lancedb/embedding";
+import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
+import { type Float, Float32, Utf8 } from "apache-arrow";
+// --8<-- [end:imports]
+
+{
+  // --8<-- [start:openai_embeddings]
+
+  const db = await lancedb.connect("/tmp/db");
+  const func = getRegistry()
+    .get("openai")
+    ?.create({ model: "text-embedding-ada-002" }) as EmbeddingFunction;
+
+  const wordsSchema = LanceSchema({
+    text: func.sourceField(new Utf8()),
+    vector: func.vectorField(),
+  });
+  const tbl = await db.createEmptyTable("words", wordsSchema, {
+    mode: "overwrite",
+  });
+  await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
+
+  const query = "greetings";
+  const actual = (await (await tbl.search(query)).limit(1).toArray())[0];
+
+  // --8<-- [end:openai_embeddings]
+  console.log("result = ", actual.text);
+}
+
+{
+  // --8<-- [start:embedding_function]
+  const db = await lancedb.connect("/tmp/db");
+
+  @register("my_embedding")
+  class MyEmbeddingFunction extends EmbeddingFunction<string> {
+    toJSON(): object {
+      return {};
+    }
+    ndims() {
+      return 3;
+    }
+    embeddingDataType(): Float {
+      return new Float32();
+    }
+    async computeQueryEmbeddings(_data: string) {
+      // This is a placeholder for a real embedding function
+      return [1, 2, 3];
+    }
+    async computeSourceEmbeddings(data: string[]) {
+      // This is a placeholder for a real embedding function
+      return Array.from({ length: data.length }).fill([1, 2, 3]) as number[][];
+    }
+  }
+
+  const func = new MyEmbeddingFunction();
+
+  const data = [{ text: "pepperoni" }, { text: "pineapple" }];
+
+  // Option 1: manually specify the embedding function
+  const table = await db.createTable("vectors", data, {
+    embeddingFunction: {
+      function: func,
+      sourceColumn: "text",
+      vectorColumn: "vector",
+    },
+    mode: "overwrite",
+  });
+
+  // Option 2: provide the embedding function through a schema
+
+  const schema = LanceSchema({
+    text: func.sourceField(new Utf8()),
+    vector: func.vectorField(),
+  });
+
+  const table2 = await db.createTable("vectors2", data, {
+    schema,
+    mode: "overwrite",
+  });
+  // --8<-- [end:embedding_function]
+}
--- a/nodejs/examples/filtering.test.ts
+++ b/nodejs/examples/filtering.test.ts
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-import * as lancedb from "@lancedb/lancedb";
-import { withTempDirectory } from "./util.ts";
-
-test("filtering examples", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    const db = await lancedb.connect(databaseDir);
-
-    const data = Array.from({ length: 10_000 }, (_, i) => ({
-      vector: Array(1536).fill(i),
-      id: i,
-      item: `item ${i}`,
-      strId: `${i}`,
-    }));
-
-    const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
-
-    // --8<-- [start:search]
-    const _result = await tbl
-      .search(Array(1536).fill(0.5))
-      .limit(1)
-      .where("id = 10")
-      .toArray();
-    // --8<-- [end:search]
-
-    // --8<-- [start:vec_search]
-    const result = await (
-      tbl.search(Array(1536).fill(0)) as lancedb.VectorQuery
-    )
-      .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
-      .postfilter()
-      .toArray();
-    // --8<-- [end:vec_search]
-    expect(result.length).toBe(0);
-
-    // --8<-- [start:sql_search]
-    await tbl.query().where("id = 10").limit(10).toArray();
-    // --8<-- [end:sql_search]
-  });
-});
--- a/nodejs/examples/filtering.ts
+++ b/nodejs/examples/filtering.ts
@@ -0,0 +1,34 @@
+import * as lancedb from "@lancedb/lancedb";
+
+const db = await lancedb.connect("data/sample-lancedb");
+
+const data = Array.from({ length: 10_000 }, (_, i) => ({
+  vector: Array(1536).fill(i),
+  id: i,
+  item: `item ${i}`,
+  strId: `${i}`,
+}));
+
+const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
+
+// --8<-- [start:search]
+const _result = await tbl
+  .search(Array(1536).fill(0.5))
+  .limit(1)
+  .where("id = 10")
+  .toArray();
+// --8<-- [end:search]
+
+// --8<-- [start:vec_search]
+await tbl
+  .search(Array(1536).fill(0))
+  .where("(item IN ('item 0', 'item 2')) AND (id > 10)")
+  .postfilter()
+  .toArray();
+// --8<-- [end:vec_search]
+
+// --8<-- [start:sql_search]
+await tbl.query().where("id = 10").limit(10).toArray();
+// --8<-- [end:sql_search]
+
+console.log("SQL search: done");
--- a/nodejs/examples/full_text_search.test.ts
+++ b/nodejs/examples/full_text_search.test.ts
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-import * as lancedb from "@lancedb/lancedb";
-import { withTempDirectory } from "./util.ts";
-
-test("full text search", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    const db = await lancedb.connect(databaseDir);
-
-    const words = [
-      "apple",
-      "banana",
-      "cherry",
-      "date",
-      "elderberry",
-      "fig",
-      "grape",
-    ];
-
-    const data = Array.from({ length: 10_000 }, (_, i) => ({
-      vector: Array(1536).fill(i),
-      id: i,
-      item: `item ${i}`,
-      strId: `${i}`,
-      doc: words[i % words.length],
-    }));
-
-    const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
-
-    await tbl.createIndex("doc", {
-      config: lancedb.Index.fts(),
-    });
-
-    // --8<-- [start:full_text_search]
-    const result = await tbl
-      .query()
-      .nearestToText("apple")
-      .select(["id", "doc"])
-      .limit(10)
-      .toArray();
-    expect(result.length).toBe(10);
-    // --8<-- [end:full_text_search]
-  });
-});
--- a/nodejs/examples/full_text_search.ts
+++ b/nodejs/examples/full_text_search.ts
@@ -0,0 +1,52 @@
+// Copyright 2024 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import * as lancedb from "@lancedb/lancedb";
+
+const db = await lancedb.connect("data/sample-lancedb");
+
+const words = [
+  "apple",
+  "banana",
+  "cherry",
+  "date",
+  "elderberry",
+  "fig",
+  "grape",
+];
+
+const data = Array.from({ length: 10_000 }, (_, i) => ({
+  vector: Array(1536).fill(i),
+  id: i,
+  item: `item ${i}`,
+  strId: `${i}`,
+  doc: words[i % words.length],
+}));
+
+const tbl = await db.createTable("myVectors", data, { mode: "overwrite" });
+
+await tbl.createIndex("doc", {
+  config: lancedb.Index.fts(),
+});
+
+// --8<-- [start:full_text_search]
+let result = await tbl
+  .search("apple")
+  .select(["id", "doc"])
+  .limit(10)
+  .toArray();
+console.log(result);
+// --8<-- [end:full_text_search]
+
+console.log("SQL search: done");
--- a/nodejs/examples/jest.config.cjs
+++ b/nodejs/examples/jest.config.cjs
@@ -1,6 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: "ts-jest",
-  testEnvironment: "node",
-  testPathIgnorePatterns: ["./dist"],
-};
--- a/nodejs/examples/jsconfig.json
+++ b/nodejs/examples/jsconfig.json
@@ -0,0 +1,27 @@
+{
+  "compilerOptions": {
+    // Enable latest features
+    "lib": ["ESNext", "DOM"],
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleDetection": "force",
+    "jsx": "react-jsx",
+    "allowJs": true,
+
+    // Bundler mode
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "noEmit": true,
+
+    // Best practices
+    "strict": true,
+    "skipLibCheck": true,
+    "noFallthroughCasesInSwitch": true,
+
+    // Some stricter flags (disabled by default)
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noPropertyAccessFromIndexSignature": false
+  }
+}
--- a/nodejs/examples/package-lock.json
+++ b/nodejs/examples/package-lock.json
--- a/nodejs/examples/package.json
+++ b/nodejs/examples/package.json
@@ -5,29 +5,24 @@
  "main": "index.js",
  "type": "module",
  "scripts": {
-    "//1": "--experimental-vm-modules is needed to run jest with sentence-transformers",
-    "//2": "--testEnvironment is needed to run jest with sentence-transformers",
-    "//3": "See: https://github.com/huggingface/transformers.js/issues/57",
-    "test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
-    "lint": "biome check *.ts && biome format *.ts",
-    "lint-ci": "biome ci .",
-    "lint-fix": "biome check --write *.ts && npm run format",
-    "format": "biome format --write *.ts"
+    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
-    "@huggingface/transformers": "^3.0.2",
-    "@lancedb/lancedb": "file:../dist",
-    "openai": "^4.29.2",
-    "sharp": "^0.33.5"
+    "@lancedb/lancedb": "file:../",
+    "@xenova/transformers": "^2.17.2"
  },
  "devDependencies": {
-    "@biomejs/biome": "^1.7.3",
-    "@jest/globals": "^29.7.0",
-    "jest": "^29.7.0",
-    "jest-environment-node-single-context": "^29.4.0",
-    "ts-jest": "^29.2.5",
    "typescript": "^5.5.4"
+  },
+  "compilerOptions": {
+    "target": "ESNext",
+    "module": "ESNext",
+    "moduleResolution": "Node",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true
  }
 }
--- a/nodejs/examples/search.test.ts
+++ b/nodejs/examples/search.test.ts
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-// --8<-- [start:import]
-import * as lancedb from "@lancedb/lancedb";
-// --8<-- [end:import]
-import { withTempDirectory } from "./util.ts";
-
-test("full text search", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    {
-      const db = await lancedb.connect(databaseDir);
-
-      const data = Array.from({ length: 10_000 }, (_, i) => ({
-        vector: Array(128).fill(i),
-        id: `${i}`,
-        content: "",
-        longId: `${i}`,
-      }));
-
-      await db.createTable("my_vectors", data);
-    }
-
-    // --8<-- [start:search1]
-    const db = await lancedb.connect(databaseDir);
-    const tbl = await db.openTable("my_vectors");
-
-    const results1 = await tbl.search(Array(128).fill(1.2)).limit(10).toArray();
-    // --8<-- [end:search1]
-    expect(results1.length).toBe(10);
-
-    // --8<-- [start:search2]
-    const results2 = await (
-      tbl.search(Array(128).fill(1.2)) as lancedb.VectorQuery
-    )
-      .distanceType("cosine")
-      .limit(10)
-      .toArray();
-    // --8<-- [end:search2]
-    expect(results2.length).toBe(10);
-  });
-});
--- a/nodejs/examples/search.ts
+++ b/nodejs/examples/search.ts
@@ -0,0 +1,38 @@
+// --8<-- [end:import]
+import * as fs from "node:fs";
+// --8<-- [start:import]
+import * as lancedb from "@lancedb/lancedb";
+
+async function setup() {
+  fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
+  const db = await lancedb.connect("data/sample-lancedb");
+
+  const data = Array.from({ length: 10_000 }, (_, i) => ({
+    vector: Array(1536).fill(i),
+    id: `${i}`,
+    content: "",
+    longId: `${i}`,
+  }));
+
+  await db.createTable("my_vectors", data);
+}
+
+await setup();
+
+// --8<-- [start:search1]
+const db = await lancedb.connect("data/sample-lancedb");
+const tbl = await db.openTable("my_vectors");
+
+const _results1 = await tbl.search(Array(1536).fill(1.2)).limit(10).toArray();
+// --8<-- [end:search1]
+
+// --8<-- [start:search2]
+const _results2 = await tbl
+  .search(Array(1536).fill(1.2))
+  .distanceType("cosine")
+  .limit(10)
+  .toArray();
+console.log(_results2);
+// --8<-- [end:search2]
+
+console.log("search: done");
--- a/nodejs/examples/sentence-transformers.js
+++ b/nodejs/examples/sentence-transformers.js
@@ -0,0 +1,50 @@
+import * as lancedb from "@lancedb/lancedb";
+
+import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
+import { Utf8 } from "apache-arrow";
+
+const db = await lancedb.connect("/tmp/db");
+const func = await getRegistry().get("huggingface").create();
+
+const facts = [
+  "Albert Einstein was a theoretical physicist.",
+  "The capital of France is Paris.",
+  "The Great Wall of China is one of the Seven Wonders of the World.",
+  "Python is a popular programming language.",
+  "Mount Everest is the highest mountain in the world.",
+  "Leonardo da Vinci painted the Mona Lisa.",
+  "Shakespeare wrote Hamlet.",
+  "The human body has 206 bones.",
+  "The speed of light is approximately 299,792 kilometers per second.",
+  "Water boils at 100 degrees Celsius.",
+  "The Earth orbits the Sun.",
+  "The Pyramids of Giza are located in Egypt.",
+  "Coffee is one of the most popular beverages in the world.",
+  "Tokyo is the capital city of Japan.",
+  "Photosynthesis is the process by which plants make their food.",
+  "The Pacific Ocean is the largest ocean on Earth.",
+  "Mozart was a prolific composer of classical music.",
+  "The Internet is a global network of computers.",
+  "Basketball is a sport played with a ball and a hoop.",
+  "The first computer virus was created in 1983.",
+  "Artificial neural networks are inspired by the human brain.",
+  "Deep learning is a subset of machine learning.",
+  "IBM's Watson won Jeopardy! in 2011.",
+  "The first computer programmer was Ada Lovelace.",
+  "The first chatbot was ELIZA, created in the 1960s.",
+].map((text) => ({ text }));
+
+const factsSchema = LanceSchema({
+  text: func.sourceField(new Utf8()),
+  vector: func.vectorField(),
+});
+
+const tbl = await db.createTable("facts", facts, {
+  mode: "overwrite",
+  schema: factsSchema,
+});
+
+const query = "How many bones are in the human body?";
+const actual = await tbl.search(query).limit(1).toArray();
+
+console.log("Answer: ", actual[0]["text"]);
--- a/nodejs/examples/sentence-transformers.test.ts
+++ b/nodejs/examples/sentence-transformers.test.ts
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import { expect, test } from "@jest/globals";
-import { withTempDirectory } from "./util.ts";
-
-import * as lancedb from "@lancedb/lancedb";
-import "@lancedb/lancedb/embedding/transformers";
-import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
-import { Utf8 } from "apache-arrow";
-
-test("full text search", async () => {
-  await withTempDirectory(async (databaseDir) => {
-    const db = await lancedb.connect(databaseDir);
-    const func = await getRegistry().get("huggingface").create();
-
-    const facts = [
-      "Albert Einstein was a theoretical physicist.",
-      "The capital of France is Paris.",
-      "The Great Wall of China is one of the Seven Wonders of the World.",
-      "Python is a popular programming language.",
-      "Mount Everest is the highest mountain in the world.",
-      "Leonardo da Vinci painted the Mona Lisa.",
-      "Shakespeare wrote Hamlet.",
-      "The human body has 206 bones.",
-      "The speed of light is approximately 299,792 kilometers per second.",
-      "Water boils at 100 degrees Celsius.",
-      "The Earth orbits the Sun.",
-      "The Pyramids of Giza are located in Egypt.",
-      "Coffee is one of the most popular beverages in the world.",
-      "Tokyo is the capital city of Japan.",
-      "Photosynthesis is the process by which plants make their food.",
-      "The Pacific Ocean is the largest ocean on Earth.",
-      "Mozart was a prolific composer of classical music.",
-      "The Internet is a global network of computers.",
-      "Basketball is a sport played with a ball and a hoop.",
-      "The first computer virus was created in 1983.",
-      "Artificial neural networks are inspired by the human brain.",
-      "Deep learning is a subset of machine learning.",
-      "IBM's Watson won Jeopardy! in 2011.",
-      "The first computer programmer was Ada Lovelace.",
-      "The first chatbot was ELIZA, created in the 1960s.",
-    ].map((text) => ({ text }));
-
-    const factsSchema = LanceSchema({
-      text: func.sourceField(new Utf8()),
-      vector: func.vectorField(),
-    });
-
-    const tbl = await db.createTable("facts", facts, {
-      mode: "overwrite",
-      schema: factsSchema,
-    });
-
-    const query = "How many bones are in the human body?";
-    const actual = await tbl.search(query).limit(1).toArray();
-
-    expect(actual[0]["text"]).toBe("The human body has 206 bones.");
-  });
-});
--- a/nodejs/examples/tsconfig.json
+++ b/nodejs/examples/tsconfig.json
@@ -1,17 +0,0 @@
-{
-  "include": ["*.test.ts"],
-  "compilerOptions": {
-    "target": "es2022",
-    "module": "NodeNext",
-    "declaration": true,
-    "outDir": "./dist",
-    "strict": true,
-    "allowJs": true,
-    "resolveJsonModule": true,
-    "emitDecoratorMetadata": true,
-    "experimentalDecorators": true,
-    "moduleResolution": "NodeNext",
-    "allowImportingTsExtensions": true,
-    "emitDeclarationOnly": true
-  }
-}
--- a/nodejs/examples/util.ts
+++ b/nodejs/examples/util.ts
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import * as fs from "fs";
-import { tmpdir } from "os";
-import * as path from "path";
-
-export async function withTempDirectory(
-  fn: (tempDir: string) => Promise<void>,
-) {
-  const tmpDirPath = fs.mkdtempSync(path.join(tmpdir(), "temp-dir-"));
-  try {
-    await fn(tmpDirPath);
-  } finally {
-    fs.rmSync(tmpDirPath, { recursive: true });
-  }
-}
--- a/nodejs/jest.config.js
+++ b/nodejs/jest.config.js
@@ -4,5 +4,4 @@ module.exports = {
  testEnvironment: "node",
  moduleDirectories: ["node_modules", "./dist"],
  moduleFileExtensions: ["js", "ts"],
-  modulePathIgnorePatterns: ["<rootDir>/examples/"],
 };
--- a/nodejs/lancedb/embedding/transformers.ts
+++ b/nodejs/lancedb/embedding/transformers.ts
@@ -47,8 +47,8 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
  string,
  Partial<XenovaTransformerOptions>
 > {
-  #model?: import("@huggingface/transformers").PreTrainedModel;
-  #tokenizer?: import("@huggingface/transformers").PreTrainedTokenizer;
+  #model?: import("@xenova/transformers").PreTrainedModel;
+  #tokenizer?: import("@xenova/transformers").PreTrainedTokenizer;
  #modelName: XenovaTransformerOptions["model"];
  #initialized = false;
  #tokenizerOptions: XenovaTransformerOptions["tokenizerOptions"];
@@ -92,19 +92,18 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    try {
      // SAFETY:
      // since typescript transpiles `import` to `require`, we need to do this in an unsafe way
-      // We can't use `require` because `@huggingface/transformers` is an ESM module
+      // We can't use `require` because `@xenova/transformers` is an ESM module
      // and we can't use `import` directly because typescript will transpile it to `require`.
      // and we want to remain compatible with both ESM and CJS modules
      // so we use `eval` to bypass typescript for this specific import.
-      transformers = await eval('import("@huggingface/transformers")');
+      transformers = await eval('import("@xenova/transformers")');
    } catch (e) {
-      throw new Error(`error loading @huggingface/transformers\nReason: ${e}`);
+      throw new Error(`error loading @xenova/transformers\nReason: ${e}`);
    }

    try {
      this.#model = await transformers.AutoModel.from_pretrained(
        this.#modelName,
-        { dtype: "fp32" },
      );
    } catch (e) {
      throw new Error(
@@ -129,8 +128,7 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    } else {
      const config = this.#model!.config;

-      // biome-ignore lint/style/useNamingConvention: we don't control this name.
-      const ndims = (config as unknown as { hidden_size: number }).hidden_size;
+      const ndims = config["hidden_size"];
      if (!ndims) {
        throw new Error(
          "hidden_size not found in model config, you may need to manually specify the embedding dimensions. ",
@@ -185,7 +183,7 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
 }

 const tensorDiv = (
-  src: import("@huggingface/transformers").Tensor,
+  src: import("@xenova/transformers").Tensor,
  divBy: number,
 ) => {
  for (let i = 0; i < src.data.length; ++i) {
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -492,42 +492,6 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    super.doCall((inner) => inner.bypassVectorIndex());
    return this;
  }
-
-  /*
-   * Add a query vector to the search
-   *
-   * This method can be called multiple times to add multiple query vectors
-   * to the search. If multiple query vectors are added, then they will be searched
-   * in parallel, and the results will be concatenated. A column called `query_index`
-   * will be added to indicate the index of the query vector that produced the result.
-   *
-   * Performance wise, this is equivalent to running multiple queries concurrently.
-   */
-  addQueryVector(vector: IntoVector): VectorQuery {
-    if (vector instanceof Promise) {
-      const res = (async () => {
-        try {
-          const v = await vector;
-          const arr = Float32Array.from(v);
-          //
-          // biome-ignore lint/suspicious/noExplicitAny: we need to get the `inner`, but js has no package scoping
-          const value: any = this.addQueryVector(arr);
-          const inner = value.inner as
-            | NativeVectorQuery
-            | Promise<NativeVectorQuery>;
-          return inner;
-        } catch (e) {
-          return Promise.reject(e);
-        }
-      })();
-      return new VectorQuery(res);
-    } else {
-      super.doCall((inner) => {
-        inner.addQueryVector(Float32Array.from(vector));
-      });
-      return this;
-    }
-  }
 }

 /** A builder for LanceDB queries. */
@@ -607,9 +571,4 @@ export class Query extends QueryBase<NativeQuery> {
      return new VectorQuery(vectorQuery);
    }
  }
-
-  nearestToText(query: string, columns?: string[]): Query {
-    this.doCall((inner) => inner.fullTextSearch(query, columns));
-    return this;
-  }
 }
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -85,7 +85,7 @@
    "reflect-metadata": "^0.2.2"
  },
  "optionalDependencies": {
-    "@huggingface/transformers": "^3.0.2",
+    "@xenova/transformers": ">=2.17 < 3",
    "openai": "^4.29.2"
  },
  "peerDependencies": {
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -135,16 +135,6 @@ impl VectorQuery {
        self.inner = self.inner.clone().column(&column);
    }

-    #[napi]
-    pub fn add_query_vector(&mut self, vector: Float32Array) -> Result<()> {
-        self.inner = self
-            .inner
-            .clone()
-            .add_query_vector(vector.as_ref())
-            .default_error()?;
-        Ok(())
-    }
-
    #[napi]
    pub fn distance_type(&mut self, distance_type: String) -> napi::Result<()> {
        let distance_type = parse_distance_type(distance_type)?;
--- a/nodejs/tsconfig.json
+++ b/nodejs/tsconfig.json
@@ -12,7 +12,7 @@
    "experimentalDecorators": true,
    "moduleResolution": "Node"
  },
-  "exclude": ["./dist/*", "./examples/*"],
+  "exclude": ["./dist/*"],
  "typedocOptions": {
    "entryPoints": ["lancedb/index.ts"],
    "out": "../docs/src/javascript/",
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.16.0-beta.1"
+current_version = "0.16.0-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.16.0-beta.1"
+version = "0.16.0-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ name = "lancedb"
 dependencies = [
    "deprecation",
    "nest-asyncio~=1.0",
-    "pylance==0.19.2",
+    "pylance==0.19.2-beta.3",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "packaging",
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -1,6 +1,15 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
+#  Copyright (c) 2023. LanceDB Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 import json
 from typing import Dict, Optional

@@ -161,7 +170,7 @@ def register(name):
    return __REGISTRY__.get_instance().register(name)


-def get_registry() -> EmbeddingFunctionRegistry:
+def get_registry():
    """
    Utility function to get the global instance of the registry

--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -110,7 +110,16 @@ class FTS:
        remove_stop_words: bool = False,
        ascii_folding: bool = False,
    ):
-        self._inner = LanceDbIndex.fts(with_position=with_position)
+        self._inner = LanceDbIndex.fts(
+            with_position=with_position,
+            base_tokenizer=base_tokenizer,
+            language=language,
+            max_token_length=max_token_length,
+            lower_case=lower_case,
+            stem=stem,
+            remove_stop_words=remove_stop_words,
+            ascii_folding=ascii_folding,
+        )


 class HnswPq:
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -943,16 +943,12 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):

 class LanceEmptyQueryBuilder(LanceQueryBuilder):
    def to_arrow(self) -> pa.Table:
-        query = Query(
+        ds = self._table.to_lance()
+        return ds.to_table(
            columns=self._columns,
            filter=self._where,
-            k=self._limit or 10,
-            with_row_id=self._with_row_id,
-            vector=[],
-            # not actually respected in remote query
-            offset=self._offset or 0,
+            limit=self._limit,
        )
-        return self._table._execute_query(query).read_all()

    def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
        """Rerank the results using the specified reranker.
@@ -1495,7 +1491,7 @@ class AsyncQuery(AsyncQueryBase):
        return pa.array(vec)

    def nearest_to(
-        self, query_vector: Optional[Union[VEC, Tuple, List[VEC]]] = None
+        self, query_vector: Optional[Union[VEC, Tuple]] = None
    ) -> AsyncVectorQuery:
        """
        Find the nearest vectors to the given query vector.
@@ -1533,30 +1529,10 @@ class AsyncQuery(AsyncQueryBase):

        Vector searches always have a [limit][].  If `limit` has not been called then
        a default `limit` of 10 will be used.
-
-        Typically, a single vector is passed in as the query. However, you can also
-        pass in multiple vectors.  This can be useful if you want to find the nearest
-        vectors to multiple query vectors. This is not expected to be faster than
-        making multiple queries concurrently; it is just a convenience method.
-        If multiple vectors are passed in then an additional column `query_index`
-        will be added to the results.  This column will contain the index of the
-        query vector that the result is nearest to.
        """
-        if (
-            isinstance(query_vector, list)
-            and len(query_vector) > 0
-            and not isinstance(query_vector[0], (float, int))
-        ):
-            # multiple have been passed
-            query_vectors = [AsyncQuery._query_vec_to_array(v) for v in query_vector]
-            new_self = self._inner.nearest_to(query_vectors[0])
-            for v in query_vectors[1:]:
-                new_self.add_query_vector(v)
-            return AsyncVectorQuery(new_self)
-        else:
-            return AsyncVectorQuery(
-                self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
-            )
+        return AsyncVectorQuery(
+            self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
+        )

    def nearest_to_text(
        self, query: str, columns: Union[str, List[str]] = []
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -132,8 +132,25 @@ class RemoteTable(Table):
        *,
        replace: bool = False,
        with_position: bool = True,
+        # tokenizer configs:
+        base_tokenizer: str = "simple",
+        language: str = "English",
+        max_token_length: Optional[int] = 40,
+        lower_case: bool = True,
+        stem: bool = False,
+        remove_stop_words: bool = False,
+        ascii_folding: bool = False,
    ):
-        config = FTS(with_position=with_position)
+        config = FTS(
+            with_position=with_position,
+            base_tokenizer=base_tokenizer,
+            language=language,
+            max_token_length=max_token_length,
+            lower_case=lower_case,
+            stem=stem,
+            remove_stop_words=remove_stop_words,
+            ascii_folding=ascii_folding,
+        )
        self._loop.run_until_complete(
            self._table.create_index(column, config=config, replace=replace)
        )
@@ -327,6 +344,10 @@ class RemoteTable(Table):
            - and also the "_distance" column which is the distance between the query
            vector and the returned vector.
        """
+        # empty query builder is not supported in saas, raise error
+        if query is None and query_type != "hybrid":
+            raise ValueError("Empty query is not supported")
+
        return LanceQueryBuilder.create(
            self,
            query,
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -13,7 +13,7 @@

 import os
 from functools import cached_property
-from typing import Optional
+from typing import Union, Optional

 import pyarrow as pa

--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -73,21 +73,6 @@ pl = safe_import_polars()
 QueryType = Literal["vector", "fts", "hybrid", "auto"]


-def _pd_schema_without_embedding_funcs(
-    schema: Optional[pa.Schema], columns: List[str]
-) -> Optional[pa.Schema]:
-    """Return a schema without any embedding function columns"""
-    if schema is None:
-        return None
-    embedding_functions = EmbeddingFunctionRegistry.get_instance().parse_functions(
-        schema.metadata
-    )
-    if not embedding_functions:
-        return schema
-    columns = set(columns)
-    return pa.schema([field for field in schema if field.name in columns])
-
-
 def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
    if _check_for_hugging_face(data):
        # Huggingface datasets
@@ -118,10 +103,10 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
        elif isinstance(data[0], pa.RecordBatch):
            return pa.Table.from_batches(data, schema=schema)
        else:
-            return pa.Table.from_pylist(data, schema=schema)
+            return pa.Table.from_pylist(data)
    elif _check_for_pandas(data) and isinstance(data, pd.DataFrame):
-        raw_schema = _pd_schema_without_embedding_funcs(schema, data.columns.to_list())
-        table = pa.Table.from_pandas(data, preserve_index=False, schema=raw_schema)
+        # Do not add schema here, since schema may contains the vector column
+        table = pa.Table.from_pandas(data, preserve_index=False)
        # Do not serialize Pandas metadata
        meta = table.schema.metadata if table.schema.metadata is not None else {}
        meta = {k: v for k, v in meta.items() if k != b"pandas"}
@@ -187,8 +172,6 @@ def sanitize_create_table(
        schema = schema.to_arrow_schema()

    if data is not None:
-        if metadata is None and schema is not None:
-            metadata = schema.metadata
        data, schema = _sanitize_data(
            data,
            schema,
--- a/python/python/tests/test_embeddings.py
+++ b/python/python/tests/test_embeddings.py
@@ -1,6 +1,15 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
+#  Copyright 2023 LanceDB Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
 from typing import List, Union
 from unittest.mock import MagicMock, patch

@@ -9,7 +18,6 @@ import lancedb
 import numpy as np
 import pyarrow as pa
 import pytest
-import pandas as pd
 from lancedb.conftest import MockTextEmbeddingFunction
 from lancedb.embeddings import (
    EmbeddingFunctionConfig,
@@ -121,142 +129,6 @@ def test_embedding_with_bad_results(tmp_path):
    # assert tbl["vector"].null_count == 1


-def test_with_existing_vectors(tmp_path):
-    @register("mock-embedding")
-    class MockEmbeddingFunction(TextEmbeddingFunction):
-        def ndims(self):
-            return 128
-
-        def generate_embeddings(
-            self, texts: Union[List[str], np.ndarray]
-        ) -> List[np.array]:
-            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
-
-    registry = get_registry()
-    model = registry.get("mock-embedding").create()
-
-    class Schema(LanceModel):
-        text: str = model.SourceField()
-        vector: Vector(model.ndims()) = model.VectorField()
-
-    db = lancedb.connect(tmp_path)
-    tbl = db.create_table("test", schema=Schema, mode="overwrite")
-    tbl.add([{"text": "hello world", "vector": np.zeros(128).tolist()}])
-
-    embeddings = tbl.to_arrow()["vector"].to_pylist()
-    assert not np.any(embeddings), "all zeros"
-
-
-def test_embedding_function_with_pandas(tmp_path):
-    @register("mock-embedding")
-    class _MockEmbeddingFunction(TextEmbeddingFunction):
-        def ndims(self):
-            return 128
-
-        def generate_embeddings(
-            self, texts: Union[List[str], np.ndarray]
-        ) -> List[np.array]:
-            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
-
-    registery = get_registry()
-    func = registery.get("mock-embedding").create()
-
-    class TestSchema(LanceModel):
-        text: str = func.SourceField()
-        val: int
-        vector: Vector(func.ndims()) = func.VectorField()
-
-    df = pd.DataFrame(
-        {
-            "text": ["hello world", "goodbye world"],
-            "val": [1, 2],
-            "not-used": ["s1", "s3"],
-        }
-    )
-    db = lancedb.connect(tmp_path)
-    tbl = db.create_table("test", schema=TestSchema, mode="overwrite", data=df)
-    schema = tbl.schema
-    assert schema.field("text").type == pa.string()
-    assert schema.field("val").type == pa.int64()
-    assert schema.field("vector").type == pa.list_(pa.float32(), 128)
-
-    df = pd.DataFrame(
-        {
-            "text": ["extra", "more"],
-            "val": [4, 5],
-            "misc-col": ["s1", "s3"],
-        }
-    )
-    tbl.add(df)
-
-    assert tbl.count_rows() == 4
-    embeddings = tbl.to_arrow()["vector"]
-    assert embeddings.null_count == 0
-
-    df = pd.DataFrame(
-        {
-            "text": ["with", "embeddings"],
-            "val": [6, 7],
-            "vector": [np.zeros(128).tolist(), np.zeros(128).tolist()],
-        }
-    )
-    tbl.add(df)
-
-    embeddings = tbl.search().where("val > 5").to_arrow()["vector"].to_pylist()
-    assert not np.any(embeddings), "all zeros"
-
-
-def test_multiple_embeddings_for_pandas(tmp_path):
-    @register("mock-embedding")
-    class MockFunc1(TextEmbeddingFunction):
-        def ndims(self):
-            return 128
-
-        def generate_embeddings(
-            self, texts: Union[List[str], np.ndarray]
-        ) -> List[np.array]:
-            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
-
-    @register("mock-embedding2")
-    class MockFunc2(TextEmbeddingFunction):
-        def ndims(self):
-            return 512
-
-        def generate_embeddings(
-            self, texts: Union[List[str], np.ndarray]
-        ) -> List[np.array]:
-            return [np.random.randn(self.ndims()).tolist() for _ in range(len(texts))]
-
-    registery = get_registry()
-    func1 = registery.get("mock-embedding").create()
-    func2 = registery.get("mock-embedding2").create()
-
-    class TestSchema(LanceModel):
-        text: str = func1.SourceField()
-        val: int
-        vec1: Vector(func1.ndims()) = func1.VectorField()
-        prompt: str = func2.SourceField()
-        vec2: Vector(func2.ndims()) = func2.VectorField()
-
-    df = pd.DataFrame(
-        {
-            "text": ["hello world", "goodbye world"],
-            "val": [1, 2],
-            "prompt": ["hello", "goodbye"],
-        }
-    )
-    db = lancedb.connect(tmp_path)
-    tbl = db.create_table("test", schema=TestSchema, mode="overwrite", data=df)
-
-    schema = tbl.schema
-    assert schema.field("text").type == pa.string()
-    assert schema.field("val").type == pa.int64()
-    assert schema.field("vec1").type == pa.list_(pa.float32(), 128)
-    assert schema.field("prompt").type == pa.string()
-    assert schema.field("vec2").type == pa.list_(pa.float32(), 512)
-    assert tbl.count_rows() == 2
-
-
@pytest.mark.slow
 def test_embedding_function_rate_limit(tmp_path):
    def _get_schema_from_model(model):
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -197,23 +197,6 @@ def test_query_sync_minimal():
        assert data == expected


-def test_query_sync_empty_query():
-    def handler(body):
-        assert body == {
-            "k": 10,
-            "filter": "true",
-            "vector": [],
-            "columns": ["id"],
-        }
-
-        return pa.table({"id": [1, 2, 3]})
-
-    with query_test_table(handler) as table:
-        data = table.search(None).where("true").select(["id"]).limit(10).to_list()
-        expected = [{"id": 1}, {"id": 2}, {"id": 3}]
-        assert data == expected
-
-
 def test_query_sync_maximal():
    def handler(body):
        assert body == {
@@ -246,17 +229,6 @@ def test_query_sync_maximal():
        )


-def test_query_sync_multiple_vectors():
-    def handler(_body):
-        return pa.table({"id": [1]})
-
-    with query_test_table(handler) as table:
-        results = table.search([[1, 2, 3], [4, 5, 6]]).limit(1).to_list()
-        assert len(results) == 2
-        results.sort(key=lambda x: x["query_index"])
-        assert results == [{"id": 1, "query_index": 0}, {"id": 1, "query_index": 1}]
-
-
 def test_query_sync_fts():
    def handler(body):
        assert body == {
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -892,15 +892,10 @@ def test_empty_query(db):
    table = LanceTable.create(db, "my_table2", data=[{"id": i} for i in range(100)])
    df = table.search().select(["id"]).to_pandas()
    assert len(df) == 10
-    # None is the same as default
    df = table.search().select(["id"]).limit(None).to_pandas()
-    assert len(df) == 10
-    # invalid limist is the same as None, wihch is the same as default
+    assert len(df) == 100
    df = table.search().select(["id"]).limit(-1).to_pandas()
-    assert len(df) == 10
-    # valid limit should work
-    df = table.search().select(["id"]).limit(42).to_pandas()
-    assert len(df) == 42
+    assert len(df) == 100


 def test_search_with_schema_inf_single_vector(db):
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -142,13 +142,6 @@ impl VectorQuery {
        self.inner = self.inner.clone().only_if(predicate);
    }

-    pub fn add_query_vector(&mut self, vector: Bound<'_, PyAny>) -> PyResult<()> {
-        let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
-        let array = make_array(data);
-        self.inner = self.inner.clone().add_query_vector(array).infer_error()?;
-        Ok(())
-    }
-
    pub fn select(&mut self, columns: Vec<(String, String)>) {
        self.inner = self.inner.clone().select(Select::dynamic(&columns));
    }
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -475,7 +475,6 @@ impl<T: HasQuery> QueryBase for T {

 /// Options for controlling the execution of a query
 #[non_exhaustive]
-#[derive(Debug, Clone)]
 pub struct QueryExecutionOptions {
    /// The maximum number of rows that will be contained in a single
    /// `RecordBatch` delivered by the query.
@@ -651,7 +650,7 @@ impl Query {
    pub fn nearest_to(self, vector: impl IntoQueryVector) -> Result<VectorQuery> {
        let mut vector_query = self.into_vector();
        let query_vector = vector.to_query_vector(&DataType::Float32, "default")?;
-        vector_query.query_vector.push(query_vector);
+        vector_query.query_vector = Some(query_vector);
        Ok(vector_query)
    }
 }
@@ -702,7 +701,7 @@ pub struct VectorQuery {
    // the column based on the dataset's schema.
    pub(crate) column: Option<String>,
    // IVF PQ - ANN search.
-    pub(crate) query_vector: Vec<Arc<dyn Array>>,
+    pub(crate) query_vector: Option<Arc<dyn Array>>,
    pub(crate) nprobes: usize,
    pub(crate) refine_factor: Option<u32>,
    pub(crate) distance_type: Option<DistanceType>,
@@ -715,7 +714,7 @@ impl VectorQuery {
        Self {
            base,
            column: None,
-            query_vector: Vec::new(),
+            query_vector: None,
            nprobes: 20,
            refine_factor: None,
            distance_type: None,
@@ -735,22 +734,6 @@ impl VectorQuery {
        self
    }

-    /// Add another query vector to the search.
-    ///
-    /// Multiple searches will be dispatched as part of the query.
-    /// This is a convenience method for adding multiple query vectors
-    /// to the search. It is not expected to be faster than issuing
-    /// multiple queries concurrently.
-    ///
-    /// The output data will contain an additional columns `query_index` which
-    /// will contain the index of the query vector that was used to generate the
-    /// result.
-    pub fn add_query_vector(mut self, vector: impl IntoQueryVector) -> Result<Self> {
-        let query_vector = vector.to_query_vector(&DataType::Float32, "default")?;
-        self.query_vector.push(query_vector);
-        Ok(self)
-    }
-
    /// Set the number of partitions to search (probe)
    ///
    /// This argument is only used when the vector column has an IVF PQ index.
@@ -871,7 +854,6 @@ mod tests {
    use std::sync::Arc;

    use super::*;
-    use arrow::{compute::concat_batches, datatypes::Int32Type};
    use arrow_array::{
        cast::AsArray, Float32Array, Int32Array, RecordBatch, RecordBatchIterator,
        RecordBatchReader,
@@ -901,10 +883,7 @@ mod tests {

        let vector = Float32Array::from_iter_values([0.1, 0.2]);
        let query = table.query().nearest_to(&[0.1, 0.2]).unwrap();
-        assert_eq!(
-            *query.query_vector.first().unwrap().as_ref().as_primitive(),
-            vector
-        );
+        assert_eq!(*query.query_vector.unwrap().as_ref().as_primitive(), vector);

        let new_vector = Float32Array::from_iter_values([9.8, 8.7]);

@@ -920,7 +899,7 @@ mod tests {
            .refine_factor(999);

        assert_eq!(
-            *query.query_vector.first().unwrap().as_ref().as_primitive(),
+            *query.query_vector.unwrap().as_ref().as_primitive(),
            new_vector
        );
        assert_eq!(query.base.limit.unwrap(), 100);
@@ -1218,34 +1197,4 @@ mod tests {
            assert!(batch.column_by_name("_rowid").is_some());
        }
    }
-
-    #[tokio::test]
-    async fn test_multiple_query_vectors() {
-        let tmp_dir = tempdir().unwrap();
-        let table = make_test_table(&tmp_dir).await;
-        let query = table
-            .query()
-            .nearest_to(&[0.1, 0.2, 0.3, 0.4])
-            .unwrap()
-            .add_query_vector(&[0.5, 0.6, 0.7, 0.8])
-            .unwrap()
-            .limit(1);
-
-        let plan = query.explain_plan(true).await.unwrap();
-        assert!(plan.contains("UnionExec"));
-
-        let results = query
-            .execute()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-        let results = concat_batches(&results[0].schema(), &results).unwrap();
-        assert_eq!(results.num_rows(), 2); // One result for each query vector.
-        let query_index = results["query_index"].as_primitive::<Int32Type>();
-        // We don't guarantee order.
-        assert!(query_index.values().contains(&0));
-        assert!(query_index.values().contains(&1));
-    }
 }
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -6,7 +6,7 @@ use crate::index::IndexStatistics;
 use crate::query::Select;
 use crate::table::AddDataMode;
 use crate::utils::{supported_btree_data_type, supported_vector_data_type};
-use crate::{Error, Table};
+use crate::Error;
 use arrow_array::RecordBatchReader;
 use arrow_ipc::reader::FileReader;
 use arrow_schema::{DataType, SchemaRef};
@@ -185,71 +185,6 @@ impl<S: HttpSend> RemoteTable<S> {

        Ok(())
    }
-
-    fn apply_vector_query_params(
-        mut body: serde_json::Value,
-        query: &VectorQuery,
-    ) -> Result<Vec<serde_json::Value>> {
-        Self::apply_query_params(&mut body, &query.base)?;
-
-        // Apply general parameters, before we dispatch based on number of query vectors.
-        body["prefilter"] = query.base.prefilter.into();
-        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
-        body["nprobes"] = query.nprobes.into();
-        body["refine_factor"] = query.refine_factor.into();
-        if let Some(vector_column) = query.column.as_ref() {
-            body["vector_column"] = serde_json::Value::String(vector_column.clone());
-        }
-        if !query.use_index {
-            body["bypass_vector_index"] = serde_json::Value::Bool(true);
-        }
-
-        fn vector_to_json(vector: &arrow_array::ArrayRef) -> Result<serde_json::Value> {
-            match vector.data_type() {
-                DataType::Float32 => {
-                    let array = vector
-                        .as_any()
-                        .downcast_ref::<arrow_array::Float32Array>()
-                        .unwrap();
-                    Ok(serde_json::Value::Array(
-                        array
-                            .values()
-                            .iter()
-                            .map(|v| {
-                                serde_json::Value::Number(
-                                    serde_json::Number::from_f64(*v as f64).unwrap(),
-                                )
-                            })
-                            .collect(),
-                    ))
-                }
-                _ => Err(Error::InvalidInput {
-                    message: "VectorQuery vector must be of type Float32".into(),
-                }),
-            }
-        }
-
-        match query.query_vector.len() {
-            0 => {
-                // Server takes empty vector, not null or undefined.
-                body["vector"] = serde_json::Value::Array(Vec::new());
-                Ok(vec![body])
-            }
-            1 => {
-                body["vector"] = vector_to_json(&query.query_vector[0])?;
-                Ok(vec![body])
-            }
-            _ => {
-                let mut bodies = Vec::with_capacity(query.query_vector.len());
-                for vector in &query.query_vector {
-                    let mut body = body.clone();
-                    body["vector"] = vector_to_json(vector)?;
-                    bodies.push(body);
-                }
-                Ok(bodies)
-            }
-        }
-    }
 }

 #[derive(Deserialize)]
@@ -371,29 +306,51 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    ) -> Result<Arc<dyn ExecutionPlan>> {
        let request = self.client.post(&format!("/v1/table/{}/query/", self.name));

-        let body = serde_json::Value::Object(Default::default());
-        let bodies = Self::apply_vector_query_params(body, query)?;
+        let mut body = serde_json::Value::Object(Default::default());
+        Self::apply_query_params(&mut body, &query.base)?;

-        let mut futures = Vec::with_capacity(bodies.len());
-        for body in bodies {
-            let request = request.try_clone().unwrap().json(&body);
-            let future = async move {
-                let (request_id, response) = self.client.send(request, true).await?;
-                self.read_arrow_stream(&request_id, response).await
-            };
-            futures.push(future);
-        }
-        let streams = futures::future::try_join_all(futures).await?;
-        if streams.len() == 1 {
-            let stream = streams.into_iter().next().unwrap();
-            Ok(Arc::new(OneShotExec::new(stream)))
+        body["prefilter"] = query.base.prefilter.into();
+        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
+        body["nprobes"] = query.nprobes.into();
+        body["refine_factor"] = query.refine_factor.into();
+
+        let vector: Vec<f32> = if let Some(vector) = query.query_vector.as_ref() {
+            match vector.data_type() {
+                DataType::Float32 => vector
+                    .as_any()
+                    .downcast_ref::<arrow_array::Float32Array>()
+                    .unwrap()
+                    .values()
+                    .iter()
+                    .cloned()
+                    .collect(),
+                _ => {
+                    return Err(Error::InvalidInput {
+                        message: "VectorQuery vector must be of type Float32".into(),
+                    })
+                }
+            }
        } else {
-            let stream_execs = streams
-                .into_iter()
-                .map(|stream| Arc::new(OneShotExec::new(stream)) as Arc<dyn ExecutionPlan>)
-                .collect();
-            Table::multi_vector_plan(stream_execs)
+            // Server takes empty vector, not null or undefined.
+            Vec::new()
+        };
+        body["vector"] = serde_json::json!(vector);
+
+        if let Some(vector_column) = query.column.as_ref() {
+            body["vector_column"] = serde_json::Value::String(vector_column.clone());
        }
+
+        if !query.use_index {
+            body["bypass_vector_index"] = serde_json::Value::Bool(true);
+        }
+
+        let request = request.json(&body);
+
+        let (request_id, response) = self.client.send(request, true).await?;
+
+        let stream = self.read_arrow_stream(&request_id, response).await?;
+
+        Ok(Arc::new(OneShotExec::new(stream)))
    }

    async fn plain_query(
@@ -698,7 +655,6 @@ mod tests {

    use super::*;

-    use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
    use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
    use arrow_schema::{DataType, Field, Schema};
    use futures::{future::BoxFuture, StreamExt, TryFutureExt};
@@ -1251,52 +1207,6 @@ mod tests {
            .unwrap();
    }

-    #[tokio::test]
-    async fn test_query_multiple_vectors() {
-        let table = Table::new_with_handler("my_table", |request| {
-            assert_eq!(request.method(), "POST");
-            assert_eq!(request.url().path(), "/v1/table/my_table/query/");
-            assert_eq!(
-                request.headers().get("Content-Type").unwrap(),
-                JSON_CONTENT_TYPE
-            );
-            let data = RecordBatch::try_new(
-                Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
-                vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
-            )
-            .unwrap();
-            let response_body = write_ipc_file(&data);
-            http::Response::builder()
-                .status(200)
-                .header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
-                .body(response_body)
-                .unwrap()
-        });
-
-        let query = table
-            .query()
-            .nearest_to(vec![0.1, 0.2, 0.3])
-            .unwrap()
-            .add_query_vector(vec![0.4, 0.5, 0.6])
-            .unwrap();
-        let plan = query.explain_plan(true).await.unwrap();
-        assert!(plan.contains("UnionExec"), "Plan: {}", plan);
-
-        let results = query
-            .execute()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-        let results = concat_batches(&results[0].schema(), &results).unwrap();
-
-        let query_index = results["query_index"].as_primitive::<Int32Type>();
-        // We don't guarantee order.
-        assert!(query_index.values().contains(&0));
-        assert!(query_index.values().contains(&1));
-    }
-
    #[tokio::test]
    async fn test_create_index() {
        let cases = [
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -24,9 +24,6 @@ use arrow_array::{RecordBatchIterator, RecordBatchReader};
 use arrow_schema::{Field, Schema, SchemaRef};
 use async_trait::async_trait;
 use datafusion_physical_plan::display::DisplayableExecutionPlan;
-use datafusion_physical_plan::projection::ProjectionExec;
-use datafusion_physical_plan::repartition::RepartitionExec;
-use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::ExecutionPlan;
 use futures::{StreamExt, TryStreamExt};
 use lance::dataset::builder::DatasetBuilder;
@@ -975,57 +972,6 @@ impl Table {
    ) -> Result<Option<IndexStatistics>> {
        self.inner.index_stats(index_name.as_ref()).await
    }
-
-    // Take many execution plans and map them into a single plan that adds
-    // a query_index column and unions them.
-    pub(crate) fn multi_vector_plan(
-        plans: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if plans.is_empty() {
-            return Err(Error::InvalidInput {
-                message: "No plans provided".to_string(),
-            });
-        }
-        // Projection to keeping all existing columns
-        let first_plan = plans[0].clone();
-        let project_all_columns = first_plan
-            .schema()
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, field)| {
-                let expr =
-                    datafusion_physical_plan::expressions::Column::new(field.name().as_str(), i);
-                let expr = Arc::new(expr) as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
-                (expr, field.name().clone())
-            })
-            .collect::<Vec<_>>();
-
-        let projected_plans = plans
-            .into_iter()
-            .enumerate()
-            .map(|(plan_i, plan)| {
-                let query_index = datafusion_common::ScalarValue::Int32(Some(plan_i as i32));
-                let query_index_expr =
-                    datafusion_physical_plan::expressions::Literal::new(query_index);
-                let query_index_expr =
-                    Arc::new(query_index_expr) as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
-                let mut projections = vec![(query_index_expr, "query_index".to_string())];
-                projections.extend_from_slice(&project_all_columns);
-                let projection = ProjectionExec::try_new(projections, plan).unwrap();
-                Arc::new(projection) as Arc<dyn datafusion_physical_plan::ExecutionPlan>
-            })
-            .collect::<Vec<_>>();
-
-        let unioned = Arc::new(UnionExec::new(projected_plans));
-        // We require 1 partition in the final output
-        let repartitioned = RepartitionExec::try_new(
-            unioned,
-            datafusion_physical_plan::Partitioning::RoundRobinBatch(1),
-        )
-        .unwrap();
-        Ok(Arc::new(repartitioned))
-    }
 }

 impl From<NativeTable> for Table {
@@ -1838,25 +1784,9 @@ impl TableInternal for NativeTable {
    ) -> Result<Arc<dyn ExecutionPlan>> {
        let ds_ref = self.dataset.get().await?;

-        if query.query_vector.len() > 1 {
-            // If there are multiple query vectors, create a plan for each of them and union them.
-            let query_vecs = query.query_vector.clone();
-            let plan_futures = query_vecs
-                .into_iter()
-                .map(|query_vector| {
-                    let mut sub_query = query.clone();
-                    sub_query.query_vector = vec![query_vector];
-                    let options_ref = options.clone();
-                    async move { self.create_plan(&sub_query, options_ref).await }
-                })
-                .collect::<Vec<_>>();
-            let plans = futures::future::try_join_all(plan_futures).await?;
-            return Table::multi_vector_plan(plans);
-        }
-
        let mut scanner: Scanner = ds_ref.scan();

-        if let Some(query_vector) = query.query_vector.first() {
+        if let Some(query_vector) = query.query_vector.as_ref() {
            // If there is a vector query, default to limit=10 if unspecified
            let column = if let Some(col) = query.column.as_ref() {
                col.clone()
@@ -1898,11 +1828,18 @@ impl TableInternal for NativeTable {
                query_vector,
                query.base.limit.unwrap_or(DEFAULT_TOP_K),
            )?;
+            scanner.limit(
+                query.base.limit.map(|limit| limit as i64),
+                query.base.offset.map(|offset| offset as i64),
+            )?;
+        } else {
+            // If there is no vector query, it's ok to not have a limit
+            scanner.limit(
+                query.base.limit.map(|limit| limit as i64),
+                query.base.offset.map(|offset| offset as i64),
+            )?;
        }
-        scanner.limit(
-            query.base.limit.map(|limit| limit as i64),
-            query.base.offset.map(|offset| offset as i64),
-        )?;
+
        scanner.nprobs(query.nprobes);
        scanner.use_index(query.use_index);
        scanner.prefilter(query.base.prefilter);
Author	SHA1	Message	Date
BubbleCal	f69b673c1e	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-11-11 17:36:06 +08:00
BubbleCal	4c6b728a31	feat: support FTS options on RemoteTable Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-08 18:49:13 +08:00
BubbleCal	138a12a427	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-11-08 18:49:09 +08:00
BubbleCal	0c108407ab	bump version Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-01 15:17:25 +08:00
BubbleCal	a7fead3801	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-11-01 15:15:23 +08:00
BubbleCal	50c68feae9	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-09-29 15:05:56 +08:00
BubbleCal	f30c5b24fa	fix Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-09-27 17:58:35 +08:00
BubbleCal	2a477ad387	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-09-27 17:00:31 +08:00
BubbleCal	0b29aca23b	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-09-09 08:09:13 +08:00
BubbleCal	df62c3d9ac	Merge branch 'main' of https://github.com/lancedb/lancedb into yang/relative-lance-dep	2024-09-04 16:45:46 +08:00
BubbleCal	aef4656053	feat: use relative lance Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-08-13 16:24:34 +08:00