add dataset features

feat(python): Embedding API fine tuning support (#1125 )
# based on https://github.com/lancedb/lancedb/pull/1023 Very WIP. I'm thinking of merging individual pieces in this feature branch instead of main so we can still review code in pieces and avoid polluting main. - Adds support for creating corpus from llama-index text-node object (aim to remove this dependency) - Adds very basic support for LLM api for chat completion, will expand as need arises. - Add basic universal evaluator - Add Sentence transformer finetuning support Known problems: - [ ] W&B experiment tracking is not working for sentence transformers
2025-12-24 13:59:58 +00:00 · 2024-04-05 16:34:21 +05:30 · 2024-03-26 20:19:15 +05:30 · 2024-03-26 19:04:09 +05:30 · 2024-03-26 07:54:16 -05:00 · 2024-03-25 20:38:37 +00:00
78 changed files with 3033 additions and 1170 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.13
+current_version = 0.4.14
 commit = True
 message = Bump version: {current_version} → {new_version}
 tag = True
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -18,7 +18,7 @@ on:
 env:
  # Disable full debug symbol generation to speed up CI build and keep memory down
  # "1" means line tables only, which is useful for panic tracebacks.
-  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=native -C target-feature=+f16c,+avx2,+fma"
+  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
  RUST_BACKTRACE: "1"

 jobs:
@@ -28,6 +28,8 @@ jobs:
    steps:
    - name: Checkout
      uses: actions/checkout@v4
+    - name: Print CPU capabilities
+      run: cat /proc/cpuinfo
    - name: Install dependecies needed for ubuntu
      run: |
        sudo apt install -y protobuf-compiler libssl-dev
@@ -39,7 +41,7 @@ jobs:
        cache: "pip"
        cache-dependency-path: "docs/test/requirements.txt"
    - name: Rust cache
-      uses: swatinem/rust-cache@v2        
+      uses: swatinem/rust-cache@v2
    - name: Build Python
      working-directory: docs/test
      run:
@@ -64,6 +66,8 @@ jobs:
      with:
        fetch-depth: 0
        lfs: true
+    - name: Print CPU capabilities
+      run: cat /proc/cpuinfo
    - name: Set up Node
      uses: actions/setup-node@v4
      with:
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -20,7 +20,8 @@ env:
  # "1" means line tables only, which is useful for panic tracebacks.
  #
  # Use native CPU to accelerate tests if possible, especially for f16
-  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=native -C target-feature=+f16c,+avx2,+fma"
+  # target-cpu=haswell fixes failing ci build
+  RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
  RUST_BACKTRACE: "1"

 jobs:
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -2,7 +2,7 @@ name: NPM Publish

 on:
  release:
-    types: [ published ]
+    types: [published]

 jobs:
  node:
@@ -19,7 +19,7 @@ jobs:
      - uses: actions/setup-node@v3
        with:
          node-version: 20
-          cache: 'npm'
+          cache: "npm"
          cache-dependency-path: node/package-lock.json
      - name: Install dependencies
        run: |
@@ -31,7 +31,7 @@ jobs:
          npm run tsc
          npm pack
      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
          name: node-package
          path: |
@@ -61,12 +61,41 @@ jobs:
      - name: Build MacOS native node modules
        run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
      - name: Upload Darwin Artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
-          name: native-darwin
+          name: node-native-darwin
          path: |
            node/dist/lancedb-vectordb-darwin*.tgz

+  nodejs-macos:
+    strategy:
+      matrix:
+        config:
+          - arch: x86_64-apple-darwin
+            runner: macos-13
+          - arch: aarch64-apple-darwin
+            # xlarge is implicitly arm64.
+            runner: macos-14
+    runs-on: ${{ matrix.config.runner }}
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install system dependencies
+        run: brew install protobuf
+      - name: Install npm dependencies
+        run: |
+          cd nodejs
+          npm ci
+      - name: Build MacOS native nodejs modules
+        run: bash ci/build_macos_artifacts_nodejs.sh ${{ matrix.config.arch }}
+      - name: Upload Darwin Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-native-darwin-${{ matrix.config.arch }}
+          path: |
+            nodejs/dist/*.node

  node-linux:
    name: node-linux (${{ matrix.config.arch}}-unknown-linux-gnu
@@ -103,12 +132,63 @@ jobs:
        run: |
          bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
      - name: Upload Linux Artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
-          name: native-linux
+          name: node-native-linux
          path: |
            node/dist/lancedb-vectordb-linux*.tgz

+  nodejs-linux:
+    name: nodejs-linux (${{ matrix.config.arch}}-unknown-linux-gnu
+    runs-on: ${{ matrix.config.runner }}
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - arch: x86_64
+            runner: ubuntu-latest
+          - arch: aarch64
+            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
+            runner: buildjet-16vcpu-ubuntu-2204-arm
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      # Buildjet aarch64 runners have only 1.5 GB RAM per core, vs 3.5 GB per core for
+      # x86_64 runners. To avoid OOM errors on ARM, we create a swap file.
+      - name: Configure aarch64 build
+        if: ${{ matrix.config.arch == 'aarch64' }}
+        run: |
+          free -h
+          sudo fallocate -l 16G /swapfile
+          sudo chmod 600 /swapfile
+          sudo mkswap /swapfile
+          sudo swapon /swapfile
+          echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
+          # print info
+          swapon --show
+          free -h
+      - name: Build Linux Artifacts
+        run: |
+          bash ci/build_linux_artifacts_nodejs.sh ${{ matrix.config.arch }}
+      - name: Upload Linux Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-native-linux-${{ matrix.config.arch }}
+          path: |
+            nodejs/dist/*.node
+      # The generic files are the same in all distros so we just pick
+      # one to do the upload.
+      - name: Upload Generic Artifacts
+        if: ${{ matrix.config.arch == 'x86_64' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-dist
+          path: |
+            nodejs/dist/*
+            !nodejs/dist/*.node
+
  node-windows:
    runs-on: windows-2022
    # Only runs on tags that matches the make-release action
@@ -136,25 +216,60 @@ jobs:
      - name: Build Windows native node modules
        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
      - name: Upload Windows Artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
-          name: native-windows
+          name: node-native-windows
          path: |
            node/dist/lancedb-vectordb-win32*.tgz

+  nodejs-windows:
+    runs-on: windows-2022
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        target: [x86_64-pc-windows-msvc]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install Protoc v21.12
+        working-directory: C:\
+        run: |
+          New-Item -Path 'C:\protoc' -ItemType Directory
+          Set-Location C:\protoc
+          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
+          7z x protoc.zip
+          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
+        shell: powershell
+      - name: Install npm dependencies
+        run: |
+          cd nodejs
+          npm ci
+      - name: Build Windows native node modules
+        run: .\ci\build_windows_artifacts_nodejs.ps1 ${{ matrix.target }}
+      - name: Upload Windows Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: nodejs-native-windows
+          path: |
+            nodejs/dist/*.node
+
  release:
    needs: [node, node-macos, node-linux, node-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
+        with:
+          pattern: node-*
      - name: Display structure of downloaded files
        run: ls -R
      - uses: actions/setup-node@v3
        with:
          node-version: 20
-          registry-url: 'https://registry.npmjs.org'
+          registry-url: "https://registry.npmjs.org"
      - name: Publish to NPM
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
@@ -164,6 +279,45 @@ jobs:
            npm publish $filename
          done

+  release-nodejs:
+    needs: [nodejs-macos, nodejs-linux, nodejs-windows]
+    runs-on: ubuntu-latest
+    # Only runs on tags that matches the make-release action
+    if: startsWith(github.ref, 'refs/tags/v')
+    defaults:
+      run:
+        shell: bash
+        working-directory: nodejs
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+        with:
+          name: nodejs-dist
+          path: nodejs/dist
+      - uses: actions/download-artifact@v4
+        name: Download arch-specific binaries
+        with:
+          pattern: nodejs-*
+          path: nodejs/nodejs-artifacts
+          merge-multiple: true
+      - name: Display structure of downloaded files
+        run: find .
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 20
+          registry-url: "https://registry.npmjs.org"
+      - name: Install napi-rs
+        run: npm install -g @napi-rs/cli
+      - name: Prepare artifacts
+        run: npx napi artifacts -d nodejs-artifacts
+      - name: Display structure of staged files
+        run: find npm
+      - name: Publish to NPM
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
+        run: npm publish --access public
+
  update-package-lock:
    needs: [release]
    runs-on: ubuntu-latest
@@ -178,3 +332,18 @@ jobs:
      - uses: ./.github/workflows/update_package_lock
        with:
          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
+
+  update-package-lock-nodejs:
+    needs: [release-nodejs]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          persist-credentials: false
+          fetch-depth: 0
+          lfs: true
+      - uses: ./.github/workflows/update_package_lock_nodejs
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
--- a/.github/workflows/update_package_lock_nodejs/action.yml
+++ b/.github/workflows/update_package_lock_nodejs/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock_nodejs
+description: "Update nodejs's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./nodejs
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/.github/workflows/update_package_lock_run_nodejs.yml
+++ b/.github/workflows/update_package_lock_run_nodejs.yml
@@ -0,0 +1,19 @@
+name: Update NodeJs package-lock.json
+
+on:
+  workflow_dispatch:
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: main
+          persist-credentials: false
+          fetch-depth: 0
+          lfs: true
+      - uses: ./.github/workflows/update_package_lock_nodejs
+        with:
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ python/dist
 node/dist
 node/examples/**/package-lock.json
 node/examples/**/dist
+nodejs/lancedb/native*
 dist

 ## Rust
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,3 +39,5 @@ pin-project = "1.0.7"
 snafu = "0.7.4"
 url = "2"
 num-traits = "0.2"
+regex = "1.10"
+lazy_static = "1"
--- a/ci/build_linux_artifacts_nodejs.sh
+++ b/ci/build_linux_artifacts_nodejs.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+ARCH=${1:-x86_64}
+
+# We pass down the current user so that when we later mount the local files
+# into the container, the files are accessible by the current user.
+pushd ci/manylinux_nodejs
+docker build \
+    -t lancedb-nodejs-manylinux \
+    --build-arg="ARCH=$ARCH" \
+    --build-arg="DOCKER_USER=$(id -u)" \
+    --progress=plain \
+    .
+popd
+
+# We turn on memory swap to avoid OOM killer
+docker run \
+    -v $(pwd):/io -w /io \
+    --memory-swap=-1 \
+    lancedb-nodejs-manylinux \
+    bash ci/manylinux_nodejs/build.sh $ARCH
--- a/ci/build_macos_artifacts_nodejs.sh
+++ b/ci/build_macos_artifacts_nodejs.sh
@@ -0,0 +1,34 @@
+# Builds the macOS artifacts (nodejs binaries).
+# Usage: ./ci/build_macos_artifacts_nodejs.sh [target]
+# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
+set -e
+
+prebuild_rust() {
+    # Building here for the sake of easier debugging.
+    pushd rust/lancedb
+    echo "Building rust library for $1"
+    export RUST_BACKTRACE=1
+    cargo build --release --target $1
+    popd
+}
+
+build_node_binaries() {
+    pushd nodejs
+    echo "Building nodejs library for $1"
+    export RUST_TARGET=$1
+    npm run build-release
+    popd
+}
+
+if [ -n "$1" ]; then
+    targets=$1
+else
+    targets="x86_64-apple-darwin aarch64-apple-darwin"
+fi
+
+echo "Building artifacts for targets: $targets"
+for target in $targets
+    do
+    prebuild_rust $target
+    build_node_binaries $target
+done
--- a/ci/build_windows_artifacts_nodejs.ps1
+++ b/ci/build_windows_artifacts_nodejs.ps1
@@ -0,0 +1,41 @@
+# Builds the Windows artifacts (nodejs binaries).
+# Usage:  .\ci\build_windows_artifacts_nodejs.ps1 [target]
+# Targets supported:
+# - x86_64-pc-windows-msvc
+# - i686-pc-windows-msvc
+
+function Prebuild-Rust {
+    param (
+        [string]$target
+    )
+
+    # Building here for the sake of easier debugging.
+    Push-Location -Path "rust/lancedb"
+    Write-Host "Building rust library for $target"
+    $env:RUST_BACKTRACE=1
+    cargo build --release --target $target
+    Pop-Location
+}
+
+function Build-NodeBinaries {
+    param (
+        [string]$target
+    )
+
+    Push-Location -Path "nodejs"
+    Write-Host "Building nodejs library for $target"
+    $env:RUST_TARGET=$target
+    npm run build-release
+    Pop-Location
+}
+
+$targets = $args[0]
+if (-not $targets) {
+    $targets = "x86_64-pc-windows-msvc"
+}
+
+Write-Host "Building artifacts for targets: $targets"
+foreach ($target in $targets) {
+    Prebuild-Rust $target
+    Build-NodeBinaries $target
+}
--- a/ci/manylinux_nodejs/Dockerfile
+++ b/ci/manylinux_nodejs/Dockerfile
@@ -0,0 +1,31 @@
+# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
+# This container allows building the node modules native libraries in an
+# environment with a very old glibc, so that we are compatible with a wide
+# range of linux distributions.
+ARG ARCH=x86_64
+
+FROM quay.io/pypa/manylinux2014_${ARCH}
+
+ARG ARCH=x86_64
+ARG DOCKER_USER=default_user
+
+# Install static openssl
+COPY install_openssl.sh install_openssl.sh
+RUN ./install_openssl.sh ${ARCH} > /dev/null
+
+# Protobuf is also installed as root.
+COPY install_protobuf.sh install_protobuf.sh
+RUN ./install_protobuf.sh ${ARCH}
+
+ENV DOCKER_USER=${DOCKER_USER}
+# Create a group and user
+RUN echo ${ARCH} && adduser --user-group --create-home --uid ${DOCKER_USER} build_user
+
+# We switch to the user to install Rust and Node, since those like to be
+# installed at the user level.
+USER ${DOCKER_USER}
+
+COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
+RUN cp /prepare_manylinux_node.sh $HOME/ && \
+    cd $HOME && \
+    ./prepare_manylinux_node.sh ${ARCH}
--- a/ci/manylinux_nodejs/build.sh
+++ b/ci/manylinux_nodejs/build.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Builds the nodejs module for manylinux. Invoked by ci/build_linux_artifacts_nodejs.sh.
+set -e
+ARCH=${1:-x86_64}
+
+if [ "$ARCH" = "x86_64" ]; then
+    export OPENSSL_LIB_DIR=/usr/local/lib64/
+else
+    export OPENSSL_LIB_DIR=/usr/local/lib/
+fi
+export OPENSSL_STATIC=1
+export OPENSSL_INCLUDE_DIR=/usr/local/include/openssl
+
+source $HOME/.bashrc
+
+cd nodejs
+npm ci
+npm run build-release
--- a/ci/manylinux_nodejs/install_openssl.sh
+++ b/ci/manylinux_nodejs/install_openssl.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Builds openssl from source so we can statically link to it
+
+# this is to avoid the error we get with the system installation:
+# /usr/bin/ld: <library>: version node not found for symbol SSLeay@@OPENSSL_1.0.1
+# /usr/bin/ld: failed to set dynamic section sizes: Bad value
+set -e
+
+git clone -b OpenSSL_1_1_1u \
+    --single-branch \
+    https://github.com/openssl/openssl.git
+
+pushd openssl
+
+if [[ $1 == x86_64* ]]; then
+    ARCH=linux-x86_64
+else
+    # gnu target
+    ARCH=linux-aarch64
+fi
+
+./Configure no-shared $ARCH
+
+make
+
+make install
--- a/ci/manylinux_nodejs/install_protobuf.sh
+++ b/ci/manylinux_nodejs/install_protobuf.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Installs protobuf compiler. Should be run as root.
+set -e
+
+if [[ $1 == x86_64* ]]; then
+    ARCH=x86_64
+else
+    # gnu target
+    ARCH=aarch_64
+fi
+
+PB_REL=https://github.com/protocolbuffers/protobuf/releases
+PB_VERSION=23.1
+curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
+unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local
--- a/ci/manylinux_nodejs/prepare_manylinux_node.sh
+++ b/ci/manylinux_nodejs/prepare_manylinux_node.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+install_node() {
+    echo "Installing node..."
+
+    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
+
+    source "$HOME"/.bashrc
+
+    nvm install --no-progress 16
+}
+
+install_rust() {
+    echo "Installing rust..."
+    curl https://sh.rustup.rs -sSf | bash -s -- -y
+    export PATH="$PATH:/root/.cargo/bin"
+}
+
+install_node
+install_rust
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -38,178 +38,180 @@ theme:
  custom_dir: overrides

 plugins:
- search
- autorefs
- mkdocstrings:
-    handlers:
-      python:
-        paths: [../python]
-        options:
-          docstring_style: numpy
-          heading_level: 4
-          show_source: true
-          show_symbol_type_in_heading: true
-          show_signature_annotations: true
-          members_order: source
-        import:
-          # for cross references
-          - https://arrow.apache.org/docs/objects.inv
-          - https://pandas.pydata.org/docs/objects.inv
- mkdocs-jupyter
- ultralytics:
-    verbose: True
-    enabled: True
-    default_image: "assets/lancedb_and_lance.png" # Default image for all pages
-    add_image: True # Automatically add meta image
-    add_keywords: True # Add page keywords in the header tag
-    add_share_buttons: True # Add social share buttons
-    add_authors: False # Display page authors
-    add_desc: False
-    add_dates: False
+  - search
+  - autorefs
+  - mkdocstrings:
+      handlers:
+        python:
+          paths: [../python]
+          options:
+            docstring_style: numpy
+            heading_level: 3
+            show_source: true
+            show_symbol_type_in_heading: true
+            show_signature_annotations: true
+            show_root_heading: true
+            members_order: source
+          import:
+            # for cross references
+            - https://arrow.apache.org/docs/objects.inv
+            - https://pandas.pydata.org/docs/objects.inv
+  - mkdocs-jupyter
+  - ultralytics:
+      verbose: True
+      enabled: True
+      default_image: "assets/lancedb_and_lance.png" # Default image for all pages
+      add_image: True # Automatically add meta image
+      add_keywords: True # Add page keywords in the header tag
+      add_share_buttons: True # Add social share buttons
+      add_authors: False # Display page authors
+      add_desc: False
+      add_dates: False

 markdown_extensions:
- admonition
- footnotes
- pymdownx.details
- pymdownx.highlight:
-    anchor_linenums: true
-    line_spans: __span
-    pygments_lang_class: true
- pymdownx.inlinehilite
- pymdownx.snippets:
-    base_path: ..
-    dedent_subsections: true
- pymdownx.superfences
- pymdownx.tabbed:
-    alternate_style: true
- md_in_html
- attr_list
+  - admonition
+  - footnotes
+  - pymdownx.details
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets:
+      base_path: ..
+      dedent_subsections: true
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      alternate_style: true
+  - md_in_html
+  - attr_list

 nav:
- Home:
-  - LanceDB: index.md
-  - 🏃🏼‍♂️ Quick start: basic.md
-  - 📚 Concepts:
-    - Vector search: concepts/vector_search.md
-    - Indexing: concepts/index_ivfpq.md
-    - Storage: concepts/storage.md
-    - Data management: concepts/data_management.md
-  - 🔨 Guides:
-    - Working with tables: guides/tables.md
-    - Building an ANN index: ann_indexes.md
-    - Vector Search: search.md
-    - Full-text search: fts.md
-    - Hybrid search:
-      - Overview: hybrid_search/hybrid_search.md
-      - Comparing Rerankers: hybrid_search/eval.md
-      - Airbnb financial data example: notebooks/hybrid_search.ipynb
-    - Filtering: sql.md
-    - Versioning & Reproducibility: notebooks/reproducibility.ipynb
-    - Configuring Storage: guides/storage.md
-  - 🧬 Managing embeddings:
-    - Overview: embeddings/index.md
-    - Embedding functions: embeddings/embedding_functions.md
-    - Available models: embeddings/default_embedding_functions.md
-    - User-defined embedding functions: embeddings/custom_embedding_function.md
-    - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-    - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
-  - 🔌 Integrations:
-    - Tools and data formats: integrations/index.md
-    - Pandas and PyArrow: python/pandas_and_pyarrow.md
-    - Polars: python/polars_arrow.md
-    - DuckDB: python/duckdb.md
-    - LangChain 🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
-    - LangChain JS/TS 🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
-    - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
-    - Pydantic: python/pydantic.md
-    - Voxel51: integrations/voxel51.md
-    - PromptTools: integrations/prompttools.md
-  - 🎯 Examples:
-    - Overview: examples/index.md
-    - 🐍 Python:
-      - Overview: examples/examples_python.md
+  - Home:
+      - LanceDB: index.md
+      - 🏃🏼‍♂️ Quick start: basic.md
+      - 📚 Concepts:
+          - Vector search: concepts/vector_search.md
+          - Indexing: concepts/index_ivfpq.md
+          - Storage: concepts/storage.md
+          - Data management: concepts/data_management.md
+      - 🔨 Guides:
+          - Working with tables: guides/tables.md
+          - Building an ANN index: ann_indexes.md
+          - Vector Search: search.md
+          - Full-text search: fts.md
+          - Hybrid search:
+              - Overview: hybrid_search/hybrid_search.md
+              - Comparing Rerankers: hybrid_search/eval.md
+              - Airbnb financial data example: notebooks/hybrid_search.ipynb
+          - Filtering: sql.md
+          - Versioning & Reproducibility: notebooks/reproducibility.ipynb
+          - Configuring Storage: guides/storage.md
+          - Sync -> Async Migration Guide: migration.md
+      - 🧬 Managing embeddings:
+          - Overview: embeddings/index.md
+          - Embedding functions: embeddings/embedding_functions.md
+          - Available models: embeddings/default_embedding_functions.md
+          - User-defined embedding functions: embeddings/custom_embedding_function.md
+          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
+          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
+      - 🔌 Integrations:
+          - Tools and data formats: integrations/index.md
+          - Pandas and PyArrow: python/pandas_and_pyarrow.md
+          - Polars: python/polars_arrow.md
+          - DuckDB: python/duckdb.md
+          - LangChain 🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
+          - LangChain JS/TS 🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
+          - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
+          - Pydantic: python/pydantic.md
+          - Voxel51: integrations/voxel51.md
+          - PromptTools: integrations/prompttools.md
+      - 🎯 Examples:
+          - Overview: examples/index.md
+          - 🐍 Python:
+              - Overview: examples/examples_python.md
+              - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
+              - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
+              - Multimodal search using CLIP: notebooks/multimodal_search.ipynb
+              - Example - Calculate CLIP Embeddings with Roboflow Inference: examples/image_embeddings_roboflow.md
+              - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
+              - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
+          - 👾 JavaScript:
+              - Overview: examples/examples_js.md
+              - Serverless Website Chatbot: examples/serverless_website_chatbot.md
+              - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
+              - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
+          - 🦀 Rust:
+              - Overview: examples/examples_rust.md
+      - 🔧 CLI & Config: cli_config.md
+      - 💭 FAQs: faq.md
+      - ⚙️ API reference:
+          - 🐍 Python: python/python.md
+          - 👾 JavaScript: javascript/modules.md
+          - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
+      - ☁️ LanceDB Cloud:
+          - Overview: cloud/index.md
+          - API reference:
+              - 🐍 Python: python/saas-python.md
+              - 👾 JavaScript: javascript/saas-modules.md
+
+  - Quick start: basic.md
+  - Concepts:
+      - Vector search: concepts/vector_search.md
+      - Indexing: concepts/index_ivfpq.md
+      - Storage: concepts/storage.md
+      - Data management: concepts/data_management.md
+  - Guides:
+      - Working with tables: guides/tables.md
+      - Building an ANN index: ann_indexes.md
+      - Vector Search: search.md
+      - Full-text search: fts.md
+      - Hybrid search:
+          - Overview: hybrid_search/hybrid_search.md
+          - Comparing Rerankers: hybrid_search/eval.md
+          - Airbnb financial data example: notebooks/hybrid_search.ipynb
+      - Filtering: sql.md
+      - Versioning & Reproducibility: notebooks/reproducibility.ipynb
+      - Configuring Storage: guides/storage.md
+      - Sync -> Async Migration Guide: migration.md
+  - Managing Embeddings:
+      - Overview: embeddings/index.md
+      - Embedding functions: embeddings/embedding_functions.md
+      - Available models: embeddings/default_embedding_functions.md
+      - User-defined embedding functions: embeddings/custom_embedding_function.md
+      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
+      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
+  - Integrations:
+      - Overview: integrations/index.md
+      - Pandas and PyArrow: python/pandas_and_pyarrow.md
+      - Polars: python/polars_arrow.md
+      - DuckDB: python/duckdb.md
+      - LangChain 🦜️🔗↗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
+      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
+      - LlamaIndex 🦙↗: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
+      - Pydantic: python/pydantic.md
+      - Voxel51: integrations/voxel51.md
+      - PromptTools: integrations/prompttools.md
+  - Examples:
+      - examples/index.md
      - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
      - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
      - Multimodal search using CLIP: notebooks/multimodal_search.ipynb
-      - Example - Calculate CLIP Embeddings with Roboflow Inference: examples/image_embeddings_roboflow.md
      - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
      - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-    - 👾 JavaScript:
-      - Overview: examples/examples_js.md
-      - Serverless Website Chatbot: examples/serverless_website_chatbot.md
-      - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
+      - YouTube Transcript Search (JS): examples/youtube_transcript_bot_with_nodejs.md
+      - Serverless Chatbot from any website: examples/serverless_website_chatbot.md
      - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
-    - 🦀 Rust:
-      - Overview: examples/examples_rust.md
-  - 🔧 CLI & Config: cli_config.md
-  - 💭 FAQs: faq.md
-  - ⚙️ API reference:
-    - 🐍 Python: python/python.md
-    - 👾 JavaScript: javascript/modules.md
-    - 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
-  - ☁️ LanceDB Cloud:
-    - Overview: cloud/index.md
-    - API reference:
-      - 🐍 Python: python/saas-python.md
-      - 👾 JavaScript: javascript/saas-modules.md
-
-
- Quick start: basic.md
- Concepts:
-    - Vector search: concepts/vector_search.md
-    - Indexing: concepts/index_ivfpq.md
-    - Storage: concepts/storage.md
-    - Data management: concepts/data_management.md
- Guides:
-    - Working with tables: guides/tables.md
-    - Building an ANN index: ann_indexes.md
-    - Vector Search: search.md
-    - Full-text search: fts.md
-    - Hybrid search:
-      - Overview: hybrid_search/hybrid_search.md
-      - Comparing Rerankers: hybrid_search/eval.md
-      - Airbnb financial data example: notebooks/hybrid_search.ipynb
-    - Filtering: sql.md
-    - Versioning & Reproducibility: notebooks/reproducibility.ipynb
-    - Configuring Storage: guides/storage.md
- Managing Embeddings:
-  - Overview: embeddings/index.md
-  - Embedding functions: embeddings/embedding_functions.md
-  - Available models: embeddings/default_embedding_functions.md
-  - User-defined embedding functions: embeddings/custom_embedding_function.md
-  - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
-  - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
- Integrations:
-  - Overview: integrations/index.md
-  - Pandas and PyArrow: python/pandas_and_pyarrow.md
-  - Polars: python/polars_arrow.md
-  - DuckDB : python/duckdb.md
-  - LangChain 🦜️🔗↗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
-  - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
-  - LlamaIndex 🦙↗: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
-  - Pydantic: python/pydantic.md
-  - Voxel51: integrations/voxel51.md
-  - PromptTools: integrations/prompttools.md
- Examples:
-  - examples/index.md
-  - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
-  - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
-  - Multimodal search using CLIP: notebooks/multimodal_search.ipynb
-  - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
-  - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
-  - YouTube Transcript Search (JS): examples/youtube_transcript_bot_with_nodejs.md
-  - Serverless Chatbot from any website: examples/serverless_website_chatbot.md
-  - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
- API reference:
-  - Overview: api_reference.md
-  - Python: python/python.md
-  - Javascript: javascript/modules.md
-  - Rust: https://docs.rs/lancedb/latest/lancedb/index.html
- LanceDB Cloud:
-    - Overview: cloud/index.md
-    - API reference:
-      - 🐍 Python: python/saas-python.md
-      - 👾 JavaScript: javascript/saas-modules.md
+  - API reference:
+      - Overview: api_reference.md
+      - Python: python/python.md
+      - Javascript: javascript/modules.md
+      - Rust: https://docs.rs/lancedb/latest/lancedb/index.html
+  - LanceDB Cloud:
+      - Overview: cloud/index.md
+      - API reference:
+          - 🐍 Python: python/saas-python.md
+          - 👾 JavaScript: javascript/saas-modules.md

 extra_css:
  - styles/global.css
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -48,11 +48,20 @@

 === "Python"

-      ```python
-      import lancedb
-      uri = "data/sample-lancedb"
-      db = lancedb.connect(uri)
-      ```
+    ```python
+    --8<-- "python/python/tests/docs/test_basic.py:imports"
+    --8<-- "python/python/tests/docs/test_basic.py:connect"
+
+    --8<-- "python/python/tests/docs/test_basic.py:connect_async"
+    ```
+
+    !!! note "Asynchronous Python API"
+
+        The asynchronous Python API is new and has some slight differences compared
+        to the synchronous API.  Feel free to start using the asynchronous version.
+        Once all features have migrated we will start to move the synchronous API to
+        use the same syntax as the asynchronous API.  To help with this migration we
+        have created a [migration guide](migration.md) detailing the differences.

 === "Typescript"

@@ -82,15 +91,14 @@ If you need a reminder of the uri, you can call `db.uri()`.
 ### Create a table from initial data

 If you have data to insert into the table at creation time, you can simultaneously create a
-table and insert the data into it.  The schema of the data will be used as the schema of the
+table and insert the data into it. The schema of the data will be used as the schema of the
 table.

 === "Python"

    ```python
-    tbl = db.create_table("my_table",
-                    data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-                          {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
+    --8<-- "python/python/tests/docs/test_basic.py:create_table"
+    --8<-- "python/python/tests/docs/test_basic.py:create_table_async"
    ```

    If the table already exists, LanceDB will raise an error by default.
@@ -100,10 +108,8 @@ table.
    You can also pass in a pandas DataFrame directly:

    ```python
-    import pandas as pd
-    df = pd.DataFrame([{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-                       {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
-    tbl = db.create_table("table_from_df", data=df)
+    --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
+    --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
    ```

 === "Typescript"
@@ -138,15 +144,14 @@ table.

 Sometimes you may not have the data to insert into the table at creation time.
 In this case, you can create an empty table and specify the schema, so that you can add
-data to the table at a later time (as long as it conforms to the schema).  This is
+data to the table at a later time (as long as it conforms to the schema). This is
 similar to a `CREATE TABLE` statement in SQL.

 === "Python"

      ```python
-      import pyarrow as pa
-      schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
-      tbl = db.create_table("empty_table", schema=schema)
+      --8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
+      --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
      ```

 === "Typescript"
@@ -168,7 +173,8 @@ Once created, you can open a table as follows:
 === "Python"

    ```python
-    tbl = db.open_table("my_table")
+    --8<-- "python/python/tests/docs/test_basic.py:open_table"
+    --8<-- "python/python/tests/docs/test_basic.py:open_table_async"
    ```

 === "Typescript"
@@ -188,7 +194,8 @@ If you forget the name of your table, you can always get a listing of all table
 === "Python"

    ```python
-    print(db.table_names())
+    --8<-- "python/python/tests/docs/test_basic.py:table_names"
+    --8<-- "python/python/tests/docs/test_basic.py:table_names_async"
    ```

 === "Javascript"
@@ -210,15 +217,8 @@ After a table has been created, you can always add more data to it as follows:
 === "Python"

    ```python
-
-    # Option 1: Add a list of dicts to a table
-    data = [{"vector": [1.3, 1.4], "item": "fizz", "price": 100.0},
-            {"vector": [9.5, 56.2], "item": "buzz", "price": 200.0}]
-    tbl.add(data)
-
-    # Option 2: Add a pandas DataFrame to a table
-    df = pd.DataFrame(data)
-    tbl.add(data)
+    --8<-- "python/python/tests/docs/test_basic.py:add_data"
+    --8<-- "python/python/tests/docs/test_basic.py:add_data_async"
    ```

 === "Typescript"
@@ -240,7 +240,8 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
 === "Python"

    ```python
-    tbl.search([100, 100]).limit(2).to_pandas()
+    --8<-- "python/python/tests/docs/test_basic.py:vector_search"
+    --8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
    ```

    This returns a pandas DataFrame with the results.
@@ -274,7 +275,8 @@ LanceDB allows you to create an ANN index on a table as follows:
 === "Python"

    ```py
-    tbl.create_index()
+    --8<-- "python/python/tests/docs/test_basic.py:create_index"
+    --8<-- "python/python/tests/docs/test_basic.py:create_index_async"
    ```

 === "Typescript"
@@ -286,15 +288,15 @@ LanceDB allows you to create an ANN index on a table as follows:
 === "Rust"

    ```rust
-     --8<-- "rust/lancedb/examples/simple.rs:create_index"
+    --8<-- "rust/lancedb/examples/simple.rs:create_index"
    ```

 !!! note "Why do I need to create an index manually?"
-    LanceDB does not automatically create the ANN index for two reasons. The first is that it's optimized
-    for really fast retrievals via a disk-based index, and the second is that data and query workloads can
-    be very diverse, so there's no one-size-fits-all index configuration. LanceDB provides many parameters
-    to fine-tune index size, query latency and accuracy. See the section on
-    [ANN indexes](ann_indexes.md) for more details.
+LanceDB does not automatically create the ANN index for two reasons. The first is that it's optimized
+for really fast retrievals via a disk-based index, and the second is that data and query workloads can
+be very diverse, so there's no one-size-fits-all index configuration. LanceDB provides many parameters
+to fine-tune index size, query latency and accuracy. See the section on
+[ANN indexes](ann_indexes.md) for more details.

 ## Delete rows from a table

@@ -305,7 +307,8 @@ This can delete any number of rows that match the filter.
 === "Python"

    ```python
-    tbl.delete('item = "fizz"')
+    --8<-- "python/python/tests/docs/test_basic.py:delete_rows"
+    --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
    ```

 === "Typescript"
@@ -322,7 +325,7 @@ This can delete any number of rows that match the filter.

 The deletion predicate is a SQL expression that supports the same expressions
 as the `where()` clause (`only_if()` in Rust) on a search. They can be as
-simple or complex as needed.  To see what expressions are supported, see the
+simple or complex as needed. To see what expressions are supported, see the
 [SQL filters](sql.md) section.

 === "Python"
@@ -344,7 +347,8 @@ Use the `drop_table()` method on the database to remove a table.
 === "Python"

      ```python
-      db.drop_table("my_table")
+      --8<-- "python/python/tests/docs/test_basic.py:drop_table"
+      --8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
      ```

      This permanently removes the table and is not recoverable, unlike deleting rows.
--- a/docs/src/embeddings/default_embedding_functions.md
+++ b/docs/src/embeddings/default_embedding_functions.md
@@ -19,27 +19,163 @@ Allows you to set parameters when registering a `sentence-transformers` object.
 | `normalize` | `bool` | `True` | Whether to normalize the input text before feeding it to the model |


-```python
-db = lancedb.connect("/tmp/db")
-registry = EmbeddingFunctionRegistry.get_instance()
-func = registry.get("sentence-transformers").create(device="cpu")
+??? "Check out available sentence-transformer models here!"
+    ```markdown
+    - sentence-transformers/all-MiniLM-L12-v2
+    - sentence-transformers/paraphrase-mpnet-base-v2
+    - sentence-transformers/gtr-t5-base
+    - sentence-transformers/LaBSE
+    - sentence-transformers/all-MiniLM-L6-v2
+    - sentence-transformers/bert-base-nli-max-tokens
+    - sentence-transformers/bert-base-nli-mean-tokens
+    - sentence-transformers/bert-base-nli-stsb-mean-tokens
+    - sentence-transformers/bert-base-wikipedia-sections-mean-tokens
+    - sentence-transformers/bert-large-nli-cls-token
+    - sentence-transformers/bert-large-nli-max-tokens
+    - sentence-transformers/bert-large-nli-mean-tokens
+    - sentence-transformers/bert-large-nli-stsb-mean-tokens
+    - sentence-transformers/distilbert-base-nli-max-tokens
+    - sentence-transformers/distilbert-base-nli-mean-tokens
+    - sentence-transformers/distilbert-base-nli-stsb-mean-tokens
+    - sentence-transformers/distilroberta-base-msmarco-v1
+    - sentence-transformers/distilroberta-base-msmarco-v2
+    - sentence-transformers/nli-bert-base-cls-pooling
+    - sentence-transformers/nli-bert-base-max-pooling
+    - sentence-transformers/nli-bert-base
+    - sentence-transformers/nli-bert-large-cls-pooling
+    - sentence-transformers/nli-bert-large-max-pooling
+    - sentence-transformers/nli-bert-large
+    - sentence-transformers/nli-distilbert-base-max-pooling
+    - sentence-transformers/nli-distilbert-base
+    - sentence-transformers/nli-roberta-base
+    - sentence-transformers/nli-roberta-large
+    - sentence-transformers/roberta-base-nli-mean-tokens
+    - sentence-transformers/roberta-base-nli-stsb-mean-tokens
+    - sentence-transformers/roberta-large-nli-mean-tokens
+    - sentence-transformers/roberta-large-nli-stsb-mean-tokens
+    - sentence-transformers/stsb-bert-base
+    - sentence-transformers/stsb-bert-large
+    - sentence-transformers/stsb-distilbert-base
+    - sentence-transformers/stsb-roberta-base
+    - sentence-transformers/stsb-roberta-large
+    - sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens
+    - sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens
+    - sentence-transformers/xlm-r-base-en-ko-nli-ststb
+    - sentence-transformers/xlm-r-bert-base-nli-mean-tokens
+    - sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens
+    - sentence-transformers/xlm-r-large-en-ko-nli-ststb
+    - sentence-transformers/bert-base-nli-cls-token
+    - sentence-transformers/all-distilroberta-v1
+    - sentence-transformers/multi-qa-MiniLM-L6-dot-v1
+    - sentence-transformers/multi-qa-distilbert-cos-v1
+    - sentence-transformers/multi-qa-distilbert-dot-v1
+    - sentence-transformers/multi-qa-mpnet-base-cos-v1
+    - sentence-transformers/multi-qa-mpnet-base-dot-v1
+    - sentence-transformers/nli-distilroberta-base-v2
+    - sentence-transformers/all-MiniLM-L6-v1
+    - sentence-transformers/all-mpnet-base-v1
+    - sentence-transformers/all-mpnet-base-v2
+    - sentence-transformers/all-roberta-large-v1
+    - sentence-transformers/allenai-specter
+    - sentence-transformers/average_word_embeddings_glove.6B.300d
+    - sentence-transformers/average_word_embeddings_glove.840B.300d
+    - sentence-transformers/average_word_embeddings_komninos
+    - sentence-transformers/average_word_embeddings_levy_dependency
+    - sentence-transformers/clip-ViT-B-32-multilingual-v1
+    - sentence-transformers/clip-ViT-B-32
+    - sentence-transformers/distilbert-base-nli-stsb-quora-ranking
+    - sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking
+    - sentence-transformers/distilroberta-base-paraphrase-v1
+    - sentence-transformers/distiluse-base-multilingual-cased-v1
+    - sentence-transformers/distiluse-base-multilingual-cased-v2
+    - sentence-transformers/distiluse-base-multilingual-cased
+    - sentence-transformers/facebook-dpr-ctx_encoder-multiset-base
+    - sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base
+    - sentence-transformers/facebook-dpr-question_encoder-multiset-base
+    - sentence-transformers/facebook-dpr-question_encoder-single-nq-base
+    - sentence-transformers/gtr-t5-large
+    - sentence-transformers/gtr-t5-xl
+    - sentence-transformers/gtr-t5-xxl
+    - sentence-transformers/msmarco-MiniLM-L-12-v3
+    - sentence-transformers/msmarco-MiniLM-L-6-v3
+    - sentence-transformers/msmarco-MiniLM-L12-cos-v5
+    - sentence-transformers/msmarco-MiniLM-L6-cos-v5
+    - sentence-transformers/msmarco-bert-base-dot-v5
+    - sentence-transformers/msmarco-bert-co-condensor
+    - sentence-transformers/msmarco-distilbert-base-dot-prod-v3
+    - sentence-transformers/msmarco-distilbert-base-tas-b
+    - sentence-transformers/msmarco-distilbert-base-v2
+    - sentence-transformers/msmarco-distilbert-base-v3
+    - sentence-transformers/msmarco-distilbert-base-v4
+    - sentence-transformers/msmarco-distilbert-cos-v5
+    - sentence-transformers/msmarco-distilbert-dot-v5
+    - sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned
+    - sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-trained-scratch
+    - sentence-transformers/msmarco-distilroberta-base-v2
+    - sentence-transformers/msmarco-roberta-base-ance-firstp
+    - sentence-transformers/msmarco-roberta-base-v2
+    - sentence-transformers/msmarco-roberta-base-v3
+    - sentence-transformers/multi-qa-MiniLM-L6-cos-v1
+    - sentence-transformers/nli-mpnet-base-v2
+    - sentence-transformers/nli-roberta-base-v2
+    - sentence-transformers/nq-distilbert-base-v1
+    - sentence-transformers/paraphrase-MiniLM-L12-v2
+    - sentence-transformers/paraphrase-MiniLM-L3-v2
+    - sentence-transformers/paraphrase-MiniLM-L6-v2
+    - sentence-transformers/paraphrase-TinyBERT-L6-v2
+    - sentence-transformers/paraphrase-albert-base-v2
+    - sentence-transformers/paraphrase-albert-small-v2
+    - sentence-transformers/paraphrase-distilroberta-base-v1
+    - sentence-transformers/paraphrase-distilroberta-base-v2
+    - sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
+    - sentence-transformers/paraphrase-multilingual-mpnet-base-v2
+    - sentence-transformers/paraphrase-xlm-r-multilingual-v1
+    - sentence-transformers/quora-distilbert-base
+    - sentence-transformers/quora-distilbert-multilingual
+    - sentence-transformers/sentence-t5-base
+    - sentence-transformers/sentence-t5-large
+    - sentence-transformers/sentence-t5-xxl
+    - sentence-transformers/sentence-t5-xl
+    - sentence-transformers/stsb-distilroberta-base-v2
+    - sentence-transformers/stsb-mpnet-base-v2
+    - sentence-transformers/stsb-roberta-base-v2
+    - sentence-transformers/stsb-xlm-r-multilingual
+    - sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1
+    - sentence-transformers/clip-ViT-L-14
+    - sentence-transformers/clip-ViT-B-16
+    - sentence-transformers/use-cmlm-multilingual
+    - sentence-transformers/all-MiniLM-L12-v1
+    ```

-class Words(LanceModel):
-    text: str = func.SourceField()
-    vector: Vector(func.ndims()) = func.VectorField()
+!!! info
+    You can also load many other model architectures from the library. For example models from sources such as BAAI, nomic, salesforce research, etc.
+    See this HF hub page for all [supported models](https://huggingface.co/models?library=sentence-transformers).

-table = db.create_table("words", schema=Words)
-table.add(
-    [
-        {"text": "hello world"}
-        {"text": "goodbye world"}
-    ]
-)
+!!! note "BAAI Embeddings example"
+    Here is an example that uses BAAI embedding model from the HuggingFace Hub [supported models](https://huggingface.co/models?library=sentence-transformers)
+    ```python
+    db = lancedb.connect("/tmp/db")
+    registry = EmbeddingFunctionRegistry.get_instance()
+    model = registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
+
+    class Words(LanceModel):
+        text: str = model.SourceField()
+        vector: Vector(model.ndims()) = model.VectorField()
+
+    table = db.create_table("words", schema=Words)
+    table.add(
+        [
+            {"text": "hello world"}
+            {"text": "goodbye world"}
+        ]
+    )
+
+    query = "greetings"
+    actual = table.search(query).limit(1).to_pydantic(Words)[0]
+    print(actual.text)
+    ```
+Visit sentence-transformers [HuggingFace HUB](https://huggingface.co/sentence-transformers) page for more information on the available models.

-query = "greetings"
-actual = table.search(query).limit(1).to_pydantic(Words)[0]
-print(actual.text)
-```

 ### OpenAI embeddings
 LanceDB registers the OpenAI embeddings function in the registry by default, as `openai`. Below are the parameters that you can customize when creating the instances:
--- a/docs/src/eval/bench_fine_tuned_hybrid.py
+++ b/docs/src/eval/bench_fine_tuned_hybrid.py
@@ -0,0 +1,150 @@
+import json
+from tqdm import tqdm
+import pandas as pd
+import os
+import requests
+from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
+from llama_index.core.schema import TextNode
+from llama_index.vector_stores.lancedb import LanceDBVectorStore
+from lancedb.rerankers import CrossEncoderReranker, ColbertReranker, CohereReranker, LinearCombinationReranker
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.embeddings.openai import OpenAIEmbedding
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+from lancedb.embeddings.fine_tuner.dataset import QADataset, TextChunk, DEFAULT_PROMPT_TMPL
+from lancedb.pydantic import LanceModel, Vector
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from lancedb.embeddings.fine_tuner.llm import Openai
+
+import time
+import lancedb
+import wandb
+from pydantic import BaseModel, root_validator
+from typing import Optional
+
+TRAIN_DATASET_FPATH = './data/train_dataset.json'
+VAL_DATASET_FPATH = './data/val_dataset.json'
+
+with open(TRAIN_DATASET_FPATH, 'r+') as f:
+    train_dataset = json.load(f)
+
+with open(VAL_DATASET_FPATH, 'r+') as f:
+    val_dataset = json.load(f)
+
+def train_embedding_model(epoch):
+    def download_test_files(url):
+        # download to cwd
+        files = []
+        filename = os.path.basename(url)
+        if not os.path.exists(filename):
+            print(f"Downloading {url} to {filename}")
+            r = requests.get(url)
+            with open(filename, 'wb') as f:
+                f.write(r.content)
+        files.append(filename)
+        return files
+    
+    def get_dataset(url, name):
+        reader = SimpleDirectoryReader(input_files=download_test_files(url))
+        docs = reader.load_data()
+
+        parser = SentenceSplitter()
+        nodes = parser.get_nodes_from_documents(docs)
+
+        if os.path.exists(name):
+            ds = QADataset.load(name)
+        else:
+            llm = Openai()
+            
+            # convert Llama-index TextNode to TextChunk
+            chunks = [TextChunk.from_llama_index_node(node) for node in nodes]
+
+            ds = QADataset.from_llm(chunks, llm, num_questions_per_chunk=2)
+            ds.save(name)
+        return ds
+    train_url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10k/uber_2021.pdf'
+    ds = get_dataset(train_url, "qa_dataset_uber")
+
+
+    model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5")
+    model.finetune(trainset=ds, valset=None, path="model_airbnb", epochs=epoch, log_wandb=True, run_name="lyft_finetune")
+
+
+def evaluate(
+    dataset,
+    embed_model,
+    reranker=None,
+    top_k=5,
+    verbose=False,
+):
+    corpus = dataset['corpus']
+    queries = dataset['queries']
+    relevant_docs = dataset['relevant_docs']
+
+    vector_store = LanceDBVectorStore(uri="/tmp/lancedb")
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    service_context = ServiceContext.from_defaults(embed_model=embed_model)
+    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()] 
+    index = VectorStoreIndex(
+        nodes, 
+        service_context=service_context, 
+        show_progress=True,
+        storage_context=storage_context,
+    )
+    tbl = vector_store.connection.open_table(vector_store.table_name)
+    tbl.create_fts_index("text", replace=True)
+
+    eval_results = []
+    for query_id, query in tqdm(queries.items()):
+        query_vector = embed_model.get_query_embedding(query)
+        try:
+            if reranker is None:
+                rs = tbl.search(query_vector).limit(top_k).to_pandas()
+            else:
+                rs = tbl.search((query_vector, query)).rerank(reranker=reranker).limit(top_k).to_pandas()
+        except Exception as e:
+            print(f'Error with query: {query_id} {e}')
+            continue
+        retrieved_ids = rs['id'].tolist()[:top_k]
+        expected_id = relevant_docs[query_id][0]
+        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc
+        if len(eval_results) == 0:
+            print(f"Query: {query}")
+            print(f"Expected: {expected_id}")
+            print(f"Retrieved: {retrieved_ids}")
+        eval_result = {
+            'is_hit': is_hit,
+            'retrieved': retrieved_ids,
+            'expected': expected_id,
+            'query': query_id,
+        }
+        eval_results.append(eval_result)
+    return eval_results
+
+if __name__ == '__main__':
+    train_embedding_model(4)
+    #embed_model = OpenAIEmbedding() # model="text-embedding-3-small"
+    rerankers = {
+        "Vector Search": None,
+        "Cohere": CohereReranker(),
+        "Cross Encoder": CrossEncoderReranker(),
+        "Colbert": ColbertReranker(),
+        "linear": LinearCombinationReranker(),
+    }
+    top_ks = [3]
+    for top_k in top_ks:
+        #for epoch in epochs:
+        for name, reranker in rerankers.items():
+            #embed_model = HuggingFaceEmbedding("./model_airbnb")
+            embed_model = OpenAIEmbedding()
+            wandb.init(project=f"Reranker-based", name=name)
+            val_eval_results = evaluate(val_dataset, embed_model, reranker=reranker, top_k=top_k)
+            df = pd.DataFrame(val_eval_results)
+
+            hit_rate = df['is_hit'].mean()
+            print(f'Hit rate: {hit_rate:.2f}')
+            wandb.log({f"openai_base_hit_rate_@{top_k}": hit_rate})
+            wandb.finish()
+
+
--- a/docs/src/eval/test_fine_tune_from_llm.py
+++ b/docs/src/eval/test_fine_tune_from_llm.py
@@ -0,0 +1,71 @@
+import os
+import json
+import lancedb
+import pandas as pd
+
+from lancedb.embeddings.fine_tuner.llm import Openai
+from lancedb.embeddings.fine_tuner.dataset import QADataset, TextChunk
+from lancedb.pydantic import LanceModel, Vector
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.schema import MetadataMode
+from lancedb.embeddings import get_registry
+
+
+test_url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10k/lyft_2021.pdf'
+train_url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10k/uber_2021.pdf'
+def download_test_files(url):
+    import os
+    import requests
+    
+    # download to cwd
+    files = []
+    filename = os.path.basename(url)
+    if not os.path.exists(filename):
+        print(f"Downloading {url} to {filename}")
+        r = requests.get(url)
+        with open(filename, 'wb') as f:
+            f.write(r.content)
+    files.append(filename)
+    return files
+
+def get_dataset(url, name):
+    reader = SimpleDirectoryReader(input_files=download_test_files(url))
+    docs = reader.load_data()
+
+    parser = SentenceSplitter()
+    nodes = parser.get_nodes_from_documents(docs)
+
+    if os.path.exists(name):
+        ds = QADataset.load(name)
+    else:
+        llm = Openai()
+        
+        # convert Llama-index TextNode to TextChunk
+        chunks = [TextChunk.from_llama_index_node(node) for node in nodes]
+
+        ds = QADataset.from_llm(chunks, llm)
+        ds.save(name)
+    return ds
+
+
+
+trainset = get_dataset(test_url, "qa_dataset_1")
+valset = get_dataset(train_url, "valset")
+
+model = get_registry().get("sentence-transformers").create()
+model.finetune(trainset=trainset, valset=valset, path="model_finetuned_1", epochs=4)
+
+base = get_registry().get("sentence-transformers").create()
+tuned = get_registry().get("sentence-transformers").create(name="./model_finetuned_1")
+openai = get_registry().get("openai").create(name="text-embedding-3-large")
+
+
+rs1 = base.evaluate(valset, path="val_res")
+rs2 = tuned.evaluate(valset, path="val_res")
+rs3 = openai.evaluate(valset)
+
+print("openai-embedding-v3 hit-rate  - ", pd.DataFrame(rs3)["is_hit"].mean())
+print("fine-tuned hit-rate  - ", pd.DataFrame(rs2)["is_hit"].mean())
+print("Base model hite-rate - ", pd.DataFrame(rs1)["is_hit"].mean())
+
--- a/docs/src/eval/test_fine_tune_from_responses.py
+++ b/docs/src/eval/test_fine_tune_from_responses.py
@@ -0,0 +1,119 @@
+import os
+import re
+import json
+import uuid
+import lancedb
+import pandas as pd
+
+from tqdm import tqdm
+from lancedb.embeddings.fine_tuner.llm import Openai
+from lancedb.embeddings.fine_tuner.dataset import QADataset, TextChunk, DEFAULT_PROMPT_TMPL
+from lancedb.pydantic import LanceModel, Vector
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.schema import MetadataMode
+from lancedb.embeddings import get_registry
+
+
+
+test_url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10k/lyft_2021.pdf'
+train_url = 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/10k/uber_2021.pdf'
+def download_test_files(url):
+    import os
+    import requests
+    
+
+    # download to cwd
+    files = []
+    filename = os.path.basename(url)
+    if not os.path.exists(filename):
+        print(f"Downloading {url} to {filename}")
+        r = requests.get(url)
+        with open(filename, 'wb') as f:
+            f.write(r.content)
+    files.append(filename)
+    return files
+
+
+def get_node(url):
+    reader = SimpleDirectoryReader(input_files=download_test_files(url))
+    docs = reader.load_data()
+
+    parser = SentenceSplitter()
+    nodes = parser.get_nodes_from_documents(docs)
+
+    return nodes
+def get_dataset(url, name):
+    reader = SimpleDirectoryReader(input_files=download_test_files(url))
+    docs = reader.load_data()
+
+    parser = SentenceSplitter()
+    nodes = parser.get_nodes_from_documents(docs)
+
+    if os.path.exists(name):
+        ds = QADataset.load(name)
+    else:
+        llm = Openai()
+        
+        # convert Llama-index TextNode to TextChunk
+        chunks = [TextChunk.from_llama_index_node(node) for node in nodes]
+
+        ds = QADataset.from_llm(chunks, llm)
+        ds.save(name)
+    return ds
+
+nodes = get_node(train_url)
+
+db = lancedb.connect("~/lancedb/fine-tuning")
+model = get_registry().get("openai").create()
+class Schema(LanceModel):
+    id: str
+    text: str = model.SourceField()
+    vector: Vector(model.ndims()) = model.VectorField()
+
+retriever = db.create_table("fine-tuning", schema=Schema, mode="overwrite")
+pylist = [{"id": str(node.node_id), "text": node.text} for node in nodes]
+retriever.add(pylist)
+
+
+
+ds_name = "response_data"
+if os.path.exists(ds_name):
+    ds = QADataset.load(ds_name)
+else:
+    # Generate questions
+    llm = Openai()
+    text_chunks = [TextChunk.from_llama_index_node(node) for node in nodes]
+
+    queries = {}
+    relevant_docs = {}
+    for chunk in tqdm(text_chunks):
+        text = chunk.text
+        questions = llm.get_questions(DEFAULT_PROMPT_TMPL.format(context_str=text, num_questions_per_chunk=2))
+
+        for question in questions:
+            question_id = str(uuid.uuid4())
+            queries[question_id] = question
+            relevant_docs[question_id] = [retriever.search(question).to_pandas()["id"].tolist()[0]]
+    ds = QADataset.from_responses(text_chunks, queries, relevant_docs)
+    ds.save(ds_name)
+
+
+# Fine-tune model
+valset = get_dataset(train_url, "valset")
+
+model = get_registry().get("sentence-transformers").create()
+res_base = model.evaluate(valset)
+
+model.finetune(trainset=ds, path="model_finetuned", epochs=4, log_wandb=True)
+tuned = get_registry().get("sentence-transformers").create(name="./model_finetuned")
+res_tuned = tuned.evaluate(valset)
+
+openai_model = get_registry().get("openai").create()
+#res_openai = openai_model.evaluate(valset)
+
+#print(f"openai model results: {pd.DataFrame(res_openai)['is_hit'].mean()}")
+print(f"base model results: {pd.DataFrame(res_base)['is_hit'].mean()}")
+print(f"tuned model results: {pd.DataFrame(res_tuned)['is_hit'].mean()}")
+
+
--- a/docs/src/extra_js/init_ask_ai_widget.js
+++ b/docs/src/extra_js/init_ask_ai_widget.js
@@ -1,11 +1,79 @@
-document.addEventListener("DOMContentLoaded", function () {
-    var script = document.createElement("script");
-    script.src = "https://widget.kapa.ai/kapa-widget.bundle.js";
-    script.setAttribute("data-website-id", "c5881fae-cec0-490b-b45e-d83d131d4f25");
-    script.setAttribute("data-project-name", "LanceDB");
-    script.setAttribute("data-project-color", "#000000");
-    script.setAttribute("data-project-logo", "https://avatars.githubusercontent.com/u/108903835?s=200&v=4");
-  script.setAttribute("data-modal-example-questions","Help me create an IVF_PQ index,How do I do an exhaustive search?,How do I create a LanceDB table?,Can I use my own embedding function?");
-    script.async = true;
-    document.head.appendChild(script);
-  });
+// Creates an SVG robot icon (from Lucide)
+function robotSVG() {
+  var svg = document.createElementNS("http://www.w3.org/2000/svg", "svg");
+  svg.setAttribute("width", "24");
+  svg.setAttribute("height", "24");
+  svg.setAttribute("viewBox", "0 0 24 24");
+  svg.setAttribute("fill", "none");
+  svg.setAttribute("stroke", "currentColor");
+  svg.setAttribute("stroke-width", "2");
+  svg.setAttribute("stroke-linecap", "round");
+  svg.setAttribute("stroke-linejoin", "round");
+  svg.setAttribute("class", "lucide lucide-bot-message-square");
+
+  var path1 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path1.setAttribute("d", "M12 6V2H8");
+  svg.appendChild(path1);
+
+  var path2 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path2.setAttribute("d", "m8 18-4 4V8a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2Z");
+  svg.appendChild(path2);
+
+  var path3 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path3.setAttribute("d", "M2 12h2");
+  svg.appendChild(path3);
+
+  var path4 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path4.setAttribute("d", "M9 11v2");
+  svg.appendChild(path4);
+
+  var path5 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path5.setAttribute("d", "M15 11v2");
+  svg.appendChild(path5);
+
+  var path6 = document.createElementNS("http://www.w3.org/2000/svg", "path");
+  path6.setAttribute("d", "M20 12h2");
+  svg.appendChild(path6);
+
+  return svg
+}
+
+// Creates the Fluidic Chatbot buttom
+function fluidicButton() {
+  var btn = document.createElement("a");
+  btn.href = "https://asklancedb.com";
+  btn.target = "_blank";
+  btn.style.position = "fixed";
+  btn.style.fontWeight = "bold";
+  btn.style.fontSize = ".8rem";
+  btn.style.right = "10px";
+  btn.style.bottom = "10px";
+  btn.style.width = "80px";
+  btn.style.height = "80px";
+  btn.style.background = "linear-gradient(135deg, #7C5EFF 0%, #625eff 100%)";
+  btn.style.color = "white";
+  btn.style.borderRadius = "5px";
+  btn.style.display = "flex";
+  btn.style.flexDirection = "column";
+  btn.style.justifyContent = "center";
+  btn.style.alignItems = "center";
+  btn.style.zIndex = "1000";
+  btn.style.opacity = "0";
+  btn.style.boxShadow = "0 0 0 rgba(0, 0, 0, 0)";
+  btn.style.transition = "opacity 0.2s ease-in, box-shadow 0.2s ease-in";
+
+  setTimeout(function() {
+      btn.style.opacity = "1";
+      btn.style.boxShadow = "0 0 .2rem #0000001a,0 .2rem .4rem #0003"
+  }, 0);
+
+  return btn
+}
+
+document.addEventListener("DOMContentLoaded", function() {
+  var btn = fluidicButton()
+  btn.appendChild(robotSVG());
+  var text = document.createTextNode("Ask AI");
+  btn.appendChild(text);
+  document.body.appendChild(btn);
+});
--- a/docs/src/migration.md
+++ b/docs/src/migration.md
@@ -0,0 +1,76 @@
+# Rust-backed Client Migration Guide
+
+In an effort to ensure all clients have the same set of capabilities we have begun migrating the
+python and node clients onto a common Rust base library. In python, this new client is part of
+the same lancedb package, exposed as an asynchronous client. Once the asynchronous client has
+reached full functionality we will begin migrating the synchronous library to be a thin wrapper
+around the asynchronous client.
+
+This guide describes the differences between the two APIs and will hopefully assist users
+that would like to migrate to the new API.
+
+## Closeable Connections
+
+The Connection now has a `close` method. You can call this when
+you are done with the connection to eagerly free resources. Currently
+this is limited to freeing/closing the HTTP connection for remote
+connections. In the future we may add caching or other resources to
+native connections so this is probably a good practice even if you
+aren't using remote connections.
+
+In addition, the connection can be used as a context manager which may
+be a more convenient way to ensure the connection is closed.
+
+```python
+import lancedb
+
+async def my_async_fn():
+    with await lancedb.connect_async("my_uri") as db:
+        print(await db.table_names())
+```
+
+It is not mandatory to call the `close` method. If you do not call it
+then the connection will be closed when the object is garbage collected.
+
+## Closeable Table
+
+The Table now also has a `close` method, similar to the connection. This
+can be used to eagerly free the cache used by a Table object. Similar to
+the connection, it can be used as a context manager and it is not mandatory
+to call the `close` method.
+
+### Changes to Table APIs
+
+- Previously `Table.schema` was a property. Now it is an async method.
+- The method `Table.__len__` was removed and `len(table)` will no longer
+  work. Use `Table.count_rows` instead.
+
+### Creating Indices
+
+The `Table.create_index` method is now used for creating both vector indices
+and scalar indices. It currently requires a column name to be specified (the
+column to index). Vector index defaults are now smarter and scale better with
+the size of the data.
+
+To specify index configuration details you will need to specify which kind of
+index you are using.
+
+### Querying
+
+The `Table.search` method has been renamed to `AsyncTable.vector_search` for
+clarity.
+
+## Features not yet supported
+
+The following features are not yet supported by the asynchronous API. However,
+we plan to support them soon.
+
+- You cannot specify an embedding function when creating or opening a table.
+  You must calculate embeddings yourself if using the asynchronous API
+- The merge insert operation is not supported in the asynchronous API
+- Cleanup / compact / optimize indices are not supported in the asynchronous API
+- add / alter columns is not supported in the asynchronous API
+- The asynchronous API does not yet support any full text search or reranking
+  search
+- Remote connections to LanceDb Cloud are not yet supported.
+- The method Table.head is not yet supported.
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -8,17 +8,20 @@ This section contains the API reference for the OSS Python API.
 pip install lancedb
 ```

-## Connection
+The following methods describe the synchronous API client. There
+is also an [asynchronous API client](#connections-asynchronous).
+
+## Connections (Synchronous)

 ::: lancedb.connect

 ::: lancedb.db.DBConnection

-## Table
+## Tables (Synchronous)

 ::: lancedb.table.Table

-## Querying
+## Querying (Synchronous)

 ::: lancedb.query.Query

@@ -86,4 +89,42 @@ pip install lancedb

 ::: lancedb.rerankers.cross_encoder.CrossEncoderReranker

-::: lancedb.rerankers.openai.OpenaiReranker
+::: lancedb.rerankers.openai.OpenaiReranker
+
+## Connections (Asynchronous)
+
+Connections represent a connection to a LanceDb database and
+can be used to create, list, or open tables.
+
+::: lancedb.connect_async
+
+::: lancedb.db.AsyncConnection
+
+## Tables (Asynchronous)
+
+Table hold your actual data as a collection of records / rows.
+
+::: lancedb.table.AsyncTable
+
+## Indices (Asynchronous)
+
+Indices can be created on a table to speed up queries. This section
+lists the indices that LanceDb supports.
+
+::: lancedb.index.BTree
+
+::: lancedb.index.IvfPq
+
+## Querying (Asynchronous)
+
+Queries allow you to return data from your database. Basic queries can be
+created with the [AsyncTable.query][lancedb.table.AsyncTable.query] method
+to return the entire (typically filtered) table. Vector searches return the
+rows nearest to a query vector and can be created with the
+[AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search] method.
+
+::: lancedb.query.AsyncQueryBase
+
+::: lancedb.query.AsyncQuery
+
+::: lancedb.query.AsyncVectorQuery
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.4.13",
+  "version": "0.4.14",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.4.13",
+      "version": "0.4.14",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.13",
-        "@lancedb/vectordb-darwin-x64": "0.4.13",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.13",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.13",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.13"
+        "@lancedb/vectordb-darwin-arm64": "0.4.14",
+        "@lancedb/vectordb-darwin-x64": "0.4.14",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.4.14",
+        "@lancedb/vectordb-linux-x64-gnu": "0.4.14",
+        "@lancedb/vectordb-win32-x64-msvc": "0.4.14"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -334,9 +334,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.4.13",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.13.tgz",
-      "integrity": "sha512-JfroNCG8yKIU931Y+x8d0Fp8C9DHUSC5j+CjI+e5err7rTWtie4j3JbsXlWAnPFaFEOg0Xk3BWkSikCvhPGJGg==",
+      "version": "0.4.14",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.14.tgz",
+      "integrity": "sha512-fw6mf6UhFf4j2kKdFcw0P+SOiIqmRbt+YQSgDbF4BFU3OUSW0XyfETIj9cUMQbSwPFsofhlGp5BRpCd7W9noew==",
      "cpu": [
        "arm64"
      ],
@@ -345,22 +345,10 @@
        "darwin"
      ]
    },
-    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.4.13",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.13.tgz",
-      "integrity": "sha512-dG6IMvfpHpnHdbJ0UffzJ7cZfMiC02MjIi6YJzgx+hKz2UNXWNBIfTvvhqli85mZsGRXL1OYDdYv0K1YzNjXlA==",
-      "cpu": [
-        "x64"
-      ],
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.4.13",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.13.tgz",
-      "integrity": "sha512-BRR1VzaMviXby7qmLm0axNZM8eUZF3ZqfvnDKdVRpC3LaRueD6pMXHuC2IUKaFkn7xktf+8BlDZb6foFNEj8bQ==",
+      "version": "0.4.14",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.14.tgz",
+      "integrity": "sha512-1+LFI8vU+f/lnGy1s3XCySuV4oj3ZUW03xtmedGBW8nv/Y/jWXP0OYJCRI72eu+dLIdu0tCPsEiu8Hl+o02t9g==",
      "cpu": [
        "arm64"
      ],
@@ -369,22 +357,10 @@
        "linux"
      ]
    },
-    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.4.13",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.13.tgz",
-      "integrity": "sha512-WnekZ7ZMlria+NODZ6aBCljCFQSe2bBNUS9ZpyFl/Y1vHduSQPuBxM6V7vp2QubC0daq/rifgjDob89DF+x3xw==",
-      "cpu": [
-        "x64"
-      ],
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.4.13",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.13.tgz",
-      "integrity": "sha512-3NDpMWBL2ksDHXAraXhowiLqQcNWM5bdbeHwze4+InYMD54hyQ2ODNc+4usxp63Nya9biVnFS27yXULqkzIEqQ==",
+      "version": "0.4.14",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.14.tgz",
+      "integrity": "sha512-fpuNMZ4aHSpZC3ztp5a0Wh18N6DpCx5EPWhS7bGA5XulGc0l+sZAJHfHwalx76ys//0Ns1z7cuKJhZpSa4SrdQ==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.4.13",
+  "version": "0.4.14",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.4.13",
-    "@lancedb/vectordb-darwin-x64": "0.4.13",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.4.13",
-    "@lancedb/vectordb-linux-x64-gnu": "0.4.13",
-    "@lancedb/vectordb-win32-x64-msvc": "0.4.13"
+    "@lancedb/vectordb-darwin-arm64": "0.4.14",
+    "@lancedb/vectordb-darwin-x64": "0.4.14",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.4.14",
+    "@lancedb/vectordb-linux-x64-gnu": "0.4.14",
+    "@lancedb/vectordb-win32-x64-msvc": "0.4.14"
  }
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -24,6 +24,7 @@ import { RemoteConnection } from './remote'
 import { Query } from './query'
 import { isEmbeddingFunction } from './embedding/embedding_function'
 import { type Literal, toSQL } from './util'
+import { type HttpMiddleware } from './middleware'

 const {
  databaseNew,
@@ -302,6 +303,18 @@ export interface Connection {
   * @param name The name of the table to drop.
   */
  dropTable(name: string): Promise<void>
+
+  /**
+   * Instrument the behavior of this Connection with middleware.
+   *
+   * The middleware will be called in the order they are added.
+   *
+   * Currently this functionality is only supported for remote Connections.
+   *
+   * @param {HttpMiddleware} - Middleware which will instrument the Connection.
+   * @returns - this Connection instrumented by the passed middleware
+   */
+  withMiddleware(middleware: HttpMiddleware): Connection
 }

 /**
@@ -541,6 +554,18 @@ export interface Table<T = number[]> {
   *                    names (e.g. "a").
   */
  dropColumns(columnNames: string[]): Promise<void>
+
+  /**
+   * Instrument the behavior of this Table with middleware.
+   *
+   * The middleware will be called in the order they are added.
+   *
+   * Currently this functionality is only supported for remote tables.
+   *
+   * @param {HttpMiddleware} - Middleware which will instrument the Table.
+   * @returns - this Table instrumented by the passed middleware
+   */
+  withMiddleware(middleware: HttpMiddleware): Table<T>
 }

 /**
@@ -795,6 +820,10 @@ export class LocalConnection implements Connection {
  async dropTable (name: string): Promise<void> {
    await databaseDropTable.call(this._db, name)
  }
+
+  withMiddleware (middleware: HttpMiddleware): Connection {
+    return this
+  }
 }

 export class LocalTable<T = number[]> implements Table<T> {
@@ -1105,6 +1134,10 @@ export class LocalTable<T = number[]> implements Table<T> {
  async dropColumns (columnNames: string[]): Promise<void> {
    return tableDropColumns.call(this._tbl, columnNames)
  }
+
+  withMiddleware (middleware: HttpMiddleware): Table<T> {
+    return this
+  }
 }

 export interface CleanupStats {
--- a/node/src/middleware.ts
+++ b/node/src/middleware.ts
@@ -0,0 +1,58 @@
+// Copyright 2024 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * Middleware for Remote LanceDB Connection or Table
+ */
+export interface HttpMiddleware {
+  /**
+   * A callback that can be used to instrument the behavior of http requests to remote
+   * tables. It can be used to add headers, modify the request, or even short-circuit
+   * the request and return a response without making the request to the remote endpoint.
+   * It can also be used to modify the response from the remote endpoint.
+   *
+   * @param {RemoteResponse} res - Request to the remote endpoint
+   * @param {onRemoteRequestNext} next - Callback to advance the middleware chain
+   */
+  onRemoteRequest(
+    req: RemoteRequest,
+    next: (req: RemoteRequest) => Promise<RemoteResponse>,
+  ): Promise<RemoteResponse>
+};
+
+export enum Method {
+  GET,
+  POST
+}
+
+/**
+ * A LanceDB Remote HTTP Request
+ */
+export interface RemoteRequest {
+  uri: string
+  method: Method
+  headers: Map<string, string>
+  params?: Map<string, string>
+  body?: any
+}
+
+/**
+ * A LanceDB Remote HTTP Response
+ */
+export interface RemoteResponse {
+  status: number
+  statusText: string
+  headers: Map<string, string>
+  body: () => Promise<any>
+}
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -12,13 +12,101 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import axios, { type AxiosResponse } from 'axios'
+import axios, { type AxiosResponse, type ResponseType } from 'axios'

 import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'

+import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
+
+interface HttpLancedbClientMiddleware {
+  onRemoteRequest(
+    req: RemoteRequest,
+    next: (req: RemoteRequest) => Promise<RemoteResponse>,
+  ): Promise<RemoteResponse>
+}
+
+/**
+ * Invoke the middleware chain and at the end call the remote endpoint
+ */
+async function callWithMiddlewares (
+  req: RemoteRequest,
+  middlewares: HttpLancedbClientMiddleware[],
+  opts?: MiddlewareInvocationOptions
+): Promise<RemoteResponse> {
+  async function call (
+    i: number,
+    req: RemoteRequest
+  ): Promise<RemoteResponse> {
+    // if we have reached the end of the middleware chain, make the request
+    if (i > middlewares.length) {
+      const headers = Object.fromEntries(req.headers.entries())
+      const params = Object.fromEntries(req.params?.entries() ?? [])
+      const timeout = 10000
+      let res
+      if (req.method === Method.POST) {
+        res = await axios.post(
+          req.uri,
+          req.body,
+          {
+            headers,
+            params,
+            timeout,
+            responseType: opts?.responseType
+          }
+        )
+      } else {
+        res = await axios.get(
+          req.uri,
+          {
+            headers,
+            params,
+            timeout
+          }
+        )
+      }
+
+      return toLanceRes(res)
+    }
+
+    // call next middleware in chain
+    return await middlewares[i - 1].onRemoteRequest(
+      req,
+      async (req) => {
+        return await call(i + 1, req)
+      }
+    )
+  }
+
+  return await call(1, req)
+}
+
+interface MiddlewareInvocationOptions {
+  responseType?: ResponseType
+}
+
+/**
+ * Marshall the library response into a LanceDB response
+ */
+function toLanceRes (res: AxiosResponse): RemoteResponse {
+  const headers = new Map()
+  for (const h in res.headers) {
+    headers.set(h, res.headers[h])
+  }
+
+  return {
+    status: res.status,
+    statusText: res.statusText,
+    headers,
+    body: async () => {
+      return res.data
+    }
+  }
+}
+
 export class HttpLancedbClient {
  private readonly _url: string
  private readonly _apiKey: () => string
+  private readonly _middlewares: HttpLancedbClientMiddleware[]

  public constructor (
    url: string,
@@ -27,6 +115,7 @@ export class HttpLancedbClient {
  ) {
    this._url = url
    this._apiKey = () => apiKey
+    this._middlewares = []
  }

  get uri (): string {
@@ -43,74 +132,61 @@ export class HttpLancedbClient {
    columns?: string[],
    filter?: string
  ): Promise<ArrowTable<any>> {
-    const response = await axios.post(
-              `${this._url}/v1/table/${tableName}/query/`,
-              {
-                vector,
-                k,
-                nprobes,
-                refineFactor,
-                columns,
-                filter,
-                prefilter
-              },
-              {
-                headers: {
-                  'Content-Type': 'application/json',
-                  'x-api-key': this._apiKey(),
-                  ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-                },
-                responseType: 'arraybuffer',
-                timeout: 10000
-              }
-    ).catch((err) => {
-      console.error('error: ', err)
-      if (err.response === undefined) {
-        throw new Error(`Network Error: ${err.message as string}`)
-      }
-      return err.response
-    })
-    if (response.status !== 200) {
-      const errorData = new TextDecoder().decode(response.data)
-      throw new Error(
-        `Server Error, status: ${response.status as number}, ` +
-        `message: ${response.statusText as string}: ${errorData}`
-      )
-    }
-
-    const table = tableFromIPC(response.data)
+    const result = await this.post(
+      `/v1/table/${tableName}/query/`,
+      {
+        vector,
+        k,
+        nprobes,
+        refineFactor,
+        columns,
+        filter,
+        prefilter
+      },
+      undefined,
+      undefined,
+      'arraybuffer'
+    )
+    const table = tableFromIPC(await result.body())
    return table
  }

  /**
   * Sent GET request.
   */
-  public async get (path: string, params?: Record<string, string | number>): Promise<AxiosResponse> {
-    const response = await axios.get(
-      `${this._url}${path}`,
-      {
-        headers: {
-          'Content-Type': 'application/json',
-          'x-api-key': this._apiKey(),
-          ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-        },
-        params,
-        timeout: 10000
-      }
-    ).catch((err) => {
+  public async get (path: string, params?: Record<string, string>): Promise<RemoteResponse> {
+    const req = {
+      uri: `${this._url}${path}`,
+      method: Method.GET,
+      headers: new Map(Object.entries({
+        'Content-Type': 'application/json',
+        'x-api-key': this._apiKey(),
+        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
+      })),
+      params: new Map(Object.entries(params ?? {}))
+    }
+
+    let response
+    try {
+      response = await callWithMiddlewares(req, this._middlewares)
+      return response
+    } catch (err: any) {
      console.error('error: ', err)
      if (err.response === undefined) {
        throw new Error(`Network Error: ${err.message as string}`)
      }
-      return err.response
-    })
+
+      response = toLanceRes(err.response)
+    }
+
    if (response.status !== 200) {
-      const errorData = new TextDecoder().decode(response.data)
+      const errorData = new TextDecoder().decode(await response.body())
      throw new Error(
-        `Server Error, status: ${response.status as number}, ` +
-        `message: ${response.statusText as string}: ${errorData}`
+        `Server Error, status: ${response.status}, ` +
+        `message: ${response.statusText}: ${errorData}`
      )
    }
+
    return response
  }

@@ -120,35 +196,65 @@ export class HttpLancedbClient {
  public async post (
    path: string,
    data?: any,
-    params?: Record<string, string | number>,
-    content?: string | undefined
-  ): Promise<AxiosResponse> {
-    const response = await axios.post(
-        `${this._url}${path}`,
-        data,
-        {
-          headers: {
-            'Content-Type': content ?? 'application/json',
-            'x-api-key': this._apiKey(),
-            ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
-          },
-          params,
-          timeout: 30000
-        }
-    ).catch((err) => {
+    params?: Record<string, string>,
+    content?: string | undefined,
+    responseType?: ResponseType | undefined
+  ): Promise<RemoteResponse> {
+    const req = {
+      uri: `${this._url}${path}`,
+      method: Method.POST,
+      headers: new Map(Object.entries({
+        'Content-Type': content ?? 'application/json',
+        'x-api-key': this._apiKey(),
+        ...(this._dbName !== undefined ? { 'x-lancedb-database': this._dbName } : {})
+      })),
+      params: new Map(Object.entries(params ?? {})),
+      body: data
+    }
+
+    let response
+    try {
+      response = await callWithMiddlewares(req, this._middlewares, { responseType })
+
+      // return response
+    } catch (err: any) {
      console.error('error: ', err)
      if (err.response === undefined) {
        throw new Error(`Network Error: ${err.message as string}`)
      }
-      return err.response
-    })
+      response = toLanceRes(err.response)
+    }
+
    if (response.status !== 200) {
-      const errorData = new TextDecoder().decode(response.data)
+      const errorData = new TextDecoder().decode(await response.body())
      throw new Error(
-          `Server Error, status: ${response.status as number}, ` +
-          `message: ${response.statusText as string}: ${errorData}`
+        `Server Error, status: ${response.status}, ` +
+        `message: ${response.statusText}: ${errorData}`
      )
    }
+
    return response
  }
+
+  /**
+   * Instrument this client with middleware
+   * @param mw - The middleware that instruments the client
+   * @returns - an instance of this client instrumented with the middleware
+   */
+  public withMiddleware (mw: HttpLancedbClientMiddleware): HttpLancedbClient {
+    const wrapped = this.clone()
+    wrapped._middlewares.push(mw)
+    return wrapped
+  }
+
+  /**
+   * Make a clone of this client
+   */
+  private clone (): HttpLancedbClient {
+    const clone = new HttpLancedbClient(this._url, this._apiKey(), this._dbName)
+    for (const mw of this._middlewares) {
+      clone._middlewares.push(mw)
+    }
+    return clone
+  }
 }
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -39,12 +39,13 @@ import {
  fromTableToStreamBuffer
 } from '../arrow'
 import { toSQL } from '../util'
+import { type HttpMiddleware } from '../middleware'

 /**
 * Remote connection.
 */
 export class RemoteConnection implements Connection {
-  private readonly _client: HttpLancedbClient
+  private _client: HttpLancedbClient
  private readonly _dbName: string

  constructor (opts: ConnectionOptions) {
@@ -84,10 +85,11 @@ export class RemoteConnection implements Connection {
    limit: number = 10
  ): Promise<string[]> {
    const response = await this._client.get('/v1/table/', {
-      limit,
+      limit: `${limit}`,
      page_token: pageToken
    })
-    return response.data.tables
+    const body = await response.body()
+    return body.tables
  }

  async openTable (name: string): Promise<Table>
@@ -163,7 +165,7 @@ export class RemoteConnection implements Connection {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }

@@ -177,6 +179,17 @@ export class RemoteConnection implements Connection {
  async dropTable (name: string): Promise<void> {
    await this._client.post(`/v1/table/${name}/drop/`)
  }
+
+  withMiddleware (middleware: HttpMiddleware): Connection {
+    const wrapped = this.clone()
+    wrapped._client = wrapped._client.withMiddleware(middleware)
+    return wrapped
+  }
+
+  private clone (): RemoteConnection {
+    const clone: RemoteConnection = Object.create(RemoteConnection.prototype)
+    return Object.assign(clone, this)
+  }
 }

 export class RemoteQuery<T = number[]> extends Query<T> {
@@ -229,7 +242,7 @@ export class RemoteQuery<T = number[]> extends Query<T> {
 // we are using extend until we have next next version release
 // Table and Connection has both been refactored to interfaces
 export class RemoteTable<T = number[]> implements Table<T> {
-  private readonly _client: HttpLancedbClient
+  private _client: HttpLancedbClient
  private readonly _embeddings?: EmbeddingFunction<T>
  private readonly _name: string

@@ -256,15 +269,15 @@ export class RemoteTable<T = number[]> implements Table<T> {
  get schema (): Promise<any> {
    return this._client
      .post(`/v1/table/${this._name}/describe/`)
-      .then((res) => {
+      .then(async (res) => {
        if (res.status !== 200) {
          throw new Error(
            `Server Error, status: ${res.status}, ` +
              // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-              `message: ${res.statusText}: ${res.data}`
+              `message: ${res.statusText}: ${await res.body()}`
          )
        }
-        return res.data?.schema
+        return (await res.body())?.schema
      })
  }

@@ -320,7 +333,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
@@ -346,7 +359,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }
    return tbl.numRows
@@ -372,7 +385,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }
    return tbl.numRows
@@ -415,7 +428,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }
@@ -436,14 +449,14 @@ export class RemoteTable<T = number[]> implements Table<T> {
      throw new Error(
        `Server Error, status: ${res.status}, ` +
          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-          `message: ${res.statusText}: ${res.data}`
+          `message: ${res.statusText}: ${await res.body()}`
      )
    }
  }

  async countRows (): Promise<number> {
    const result = await this._client.post(`/v1/table/${this._name}/describe/`)
-    return result.data?.stats?.num_rows
+    return (await result.body())?.stats?.num_rows
  }

  async delete (filter: string): Promise<void> {
@@ -476,7 +489,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
    const results = await this._client.post(
      `/v1/table/${this._name}/index/list/`
    )
-    return results.data.indexes?.map((index: any) => ({
+    return (await results.body()).indexes?.map((index: any) => ({
      columns: index.columns,
      name: index.index_name,
      uuid: index.index_uuid
@@ -487,9 +500,10 @@ export class RemoteTable<T = number[]> implements Table<T> {
    const results = await this._client.post(
      `/v1/table/${this._name}/index/${indexUuid}/stats/`
    )
+    const body = await results.body()
    return {
-      numIndexedRows: results.data.num_indexed_rows,
-      numUnindexedRows: results.data.num_unindexed_rows
+      numIndexedRows: body?.num_indexed_rows,
+      numUnindexedRows: body?.num_unindexed_rows
    }
  }

@@ -504,4 +518,15 @@ export class RemoteTable<T = number[]> implements Table<T> {
  async dropColumns (columnNames: string[]): Promise<void> {
    throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
  }
+
+  withMiddleware(middleware: HttpMiddleware): Table<T> {
+    const wrapped = this.clone()
+    wrapped._client = wrapped._client.withMiddleware(middleware)
+    return wrapped
+  }
+
+  private clone (): RemoteTable<T> {
+    const clone: RemoteTable<T> = Object.create(RemoteTable.prototype)
+    return Object.assign(clone, this)
+  }
 }
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -106,6 +106,9 @@ export class MakeArrowTableOptions {
 * An enhanced version of the {@link makeTable} function from Apache Arrow
 * that supports nested fields and embeddings columns.
 *
+ * (typically you do not need to call this function.  It will be called automatically
+ * when creating a table or adding data to it)
+ *
 * This function converts an array of Record<String, any> (row-major JS objects)
 * to an Arrow Table (a columnar structure)
 *
--- a/nodejs/lancedb/embedding/index.ts
+++ b/nodejs/lancedb/embedding/index.ts
@@ -0,0 +1,2 @@
+export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
+export { OpenAIEmbeddingFunction } from "./openai";
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -18,9 +18,34 @@ import {
  ConnectionOptions,
 } from "./native.js";

-export { ConnectionOptions, WriteOptions, Query } from "./native.js";
-export { Connection, CreateTableOptions } from "./connection";
-export { Table, AddDataOptions } from "./table";
+export {
+  WriteOptions,
+  WriteMode,
+  AddColumnsSql,
+  ColumnAlteration,
+  ConnectionOptions,
+} from "./native.js";
+export {
+  makeArrowTable,
+  MakeArrowTableOptions,
+  Data,
+  VectorColumnOptions,
+} from "./arrow";
+export {
+  Connection,
+  CreateTableOptions,
+  TableNamesOptions,
+} from "./connection";
+export {
+  ExecutableQuery,
+  Query,
+  QueryBase,
+  VectorQuery,
+  RecordBatchIterator,
+} from "./query";
+export { Index, IndexOptions, IvfPqOptions } from "./indices";
+export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
+export * as embedding from "./embedding";

 /**
 * Connect to a LanceDB instance at the given URI.
--- a/nodejs/lancedb/native.d.ts
+++ b/nodejs/lancedb/native.d.ts
@@ -1,147 +0,0 @@
-/* tslint:disable */
-/* eslint-disable */
-
-/* auto-generated by NAPI-RS */
-
-/** A description of an index currently configured on a column */
-export interface IndexConfig {
-  /** The type of the index */
-  indexType: string
-  /**
-   * The columns in the index
-   *
-   * Currently this is always an array of size 1.  In the future there may
-   * be more columns to represent composite indices.
-   */
-  columns: Array<string>
-}
-/**
- *  A definition of a column alteration. The alteration changes the column at
- * `path` to have the new name `name`, to be nullable if `nullable` is true,
- * and to have the data type `data_type`. At least one of `rename` or `nullable`
- * must be provided.
- */
-export interface ColumnAlteration {
-  /**
-   * The path to the column to alter. This is a dot-separated path to the column.
-   * If it is a top-level column then it is just the name of the column. If it is
-   * a nested column then it is the path to the column, e.g. "a.b.c" for a column
-   * `c` nested inside a column `b` nested inside a column `a`.
-   */
-  path: string
-  /**
-   * The new name of the column. If not provided then the name will not be changed.
-   * This must be distinct from the names of all other columns in the table.
-   */
-  rename?: string
-  /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
-  nullable?: boolean
-}
-/** A definition of a new column to add to a table. */
-export interface AddColumnsSql {
-  /** The name of the new column. */
-  name: string
-  /**
-   * The values to populate the new column with, as a SQL expression.
-   * The expression can reference other columns in the table.
-   */
-  valueSql: string
-}
-export interface ConnectionOptions {
-  apiKey?: string
-  hostOverride?: string
-  /**
-   * (For LanceDB OSS only): The interval, in seconds, at which to check for
-   * updates to the table from other processes. If None, then consistency is not
-   * checked. For performance reasons, this is the default. For strong
-   * consistency, set this to zero seconds. Then every read will check for
-   * updates from other processes. As a compromise, you can set this to a
-   * non-zero value for eventual consistency. If more than that interval
-   * has passed since the last check, then the table will be checked for updates.
-   * Note: this consistency only applies to read operations. Write operations are
-   * always consistent.
-   */
-  readConsistencyInterval?: number
-}
-/** Write mode for writing a table. */
-export const enum WriteMode {
-  Create = 'Create',
-  Append = 'Append',
-  Overwrite = 'Overwrite'
-}
-/** Write options when creating a Table. */
-export interface WriteOptions {
-  mode?: WriteMode
-}
-export function connect(uri: string, options: ConnectionOptions): Promise<Connection>
-export class Connection {
-  /** Create a new Connection instance from the given URI. */
-  static new(uri: string, options: ConnectionOptions): Promise<Connection>
-  display(): string
-  isOpen(): boolean
-  close(): void
-  /** List all tables in the dataset. */
-  tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
-  /**
-   * Create table from a Apache Arrow IPC (file) buffer.
-   *
-   * Parameters:
-   * - name: The name of the table.
-   * - buf: The buffer containing the IPC file.
-   *
-   */
-  createTable(name: string, buf: Buffer, mode: string): Promise<Table>
-  createEmptyTable(name: string, schemaBuf: Buffer, mode: string): Promise<Table>
-  openTable(name: string): Promise<Table>
-  /** Drop table with the name. Or raise an error if the table does not exist. */
-  dropTable(name: string): Promise<void>
-}
-export class Index {
-  static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
-  static btree(): Index
-}
-/** Typescript-style Async Iterator over RecordBatches  */
-export class RecordBatchIterator {
-  next(): Promise<Buffer | null>
-}
-export class Query {
-  onlyIf(predicate: string): void
-  select(columns: Array<[string, string]>): void
-  limit(limit: number): void
-  nearestTo(vector: Float32Array): VectorQuery
-  execute(): Promise<RecordBatchIterator>
-}
-export class VectorQuery {
-  column(column: string): void
-  distanceType(distanceType: string): void
-  postfilter(): void
-  refineFactor(refineFactor: number): void
-  nprobes(nprobe: number): void
-  bypassVectorIndex(): void
-  onlyIf(predicate: string): void
-  select(columns: Array<[string, string]>): void
-  limit(limit: number): void
-  execute(): Promise<RecordBatchIterator>
-}
-export class Table {
-  display(): string
-  isOpen(): boolean
-  close(): void
-  /** Return Schema as empty Arrow IPC file. */
-  schema(): Promise<Buffer>
-  add(buf: Buffer, mode: string): Promise<void>
-  countRows(filter?: string | undefined | null): Promise<number>
-  delete(predicate: string): Promise<void>
-  createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
-  update(onlyIf: string | undefined | null, columns: Array<[string, string]>): Promise<void>
-  query(): Query
-  vectorSearch(vector: Float32Array): VectorQuery
-  addColumns(transforms: Array<AddColumnsSql>): Promise<void>
-  alterColumns(alterations: Array<ColumnAlteration>): Promise<void>
-  dropColumns(columns: Array<string>): Promise<void>
-  version(): Promise<number>
-  checkout(version: number): Promise<void>
-  checkoutLatest(): Promise<void>
-  restore(): Promise<void>
-  listIndices(): Promise<Array<IndexConfig>>
-}
--- a/nodejs/lancedb/native.js
+++ b/nodejs/lancedb/native.js
@@ -1,329 +0,0 @@
-/* tslint:disable */
-/* eslint-disable */
-/* prettier-ignore */
-
-/* auto-generated by NAPI-RS */
-
-const { existsSync, readFileSync } = require('fs')
-const { join } = require("path");
-
-const { platform, arch } = process;
-
-let nativeBinding = null;
-let localFileExisted = false;
-let loadError = null;
-
-function isMusl() {
-  // For Node 10
-  if (!process.report || typeof process.report.getReport !== "function") {
-    try {
-      const lddPath = require("child_process")
-        .execSync("which ldd")
-        .toString()
-        .trim();
-      return readFileSync(lddPath, "utf8").includes("musl");
-    } catch (e) {
-      return true;
-    }
-  } else {
-    const { glibcVersionRuntime } = process.report.getReport().header;
-    return !glibcVersionRuntime;
-  }
-}
-
-switch (platform) {
-  case "android":
-    switch (arch) {
-      case "arm64":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.android-arm64.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.android-arm64.node");
-          } else {
-            nativeBinding = require("lancedb-android-arm64");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      case "arm":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.android-arm-eabi.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.android-arm-eabi.node");
-          } else {
-            nativeBinding = require("lancedb-android-arm-eabi");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      default:
-        throw new Error(`Unsupported architecture on Android ${arch}`);
-    }
-    break;
-  case "win32":
-    switch (arch) {
-      case "x64":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.win32-x64-msvc.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.win32-x64-msvc.node");
-          } else {
-            nativeBinding = require("lancedb-win32-x64-msvc");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      case "ia32":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.win32-ia32-msvc.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.win32-ia32-msvc.node");
-          } else {
-            nativeBinding = require("lancedb-win32-ia32-msvc");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      case "arm64":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.win32-arm64-msvc.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.win32-arm64-msvc.node");
-          } else {
-            nativeBinding = require("lancedb-win32-arm64-msvc");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      default:
-        throw new Error(`Unsupported architecture on Windows: ${arch}`);
-    }
-    break;
-  case "darwin":
-    localFileExisted = existsSync(
-      join(__dirname, "lancedb-nodejs.darwin-universal.node"),
-    );
-    try {
-      if (localFileExisted) {
-        nativeBinding = require("./lancedb-nodejs.darwin-universal.node");
-      } else {
-        nativeBinding = require("lancedb-darwin-universal");
-      }
-      break;
-    } catch {}
-    switch (arch) {
-      case "x64":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.darwin-x64.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.darwin-x64.node");
-          } else {
-            nativeBinding = require("lancedb-darwin-x64");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      case "arm64":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.darwin-arm64.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.darwin-arm64.node");
-          } else {
-            nativeBinding = require("lancedb-darwin-arm64");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      default:
-        throw new Error(`Unsupported architecture on macOS: ${arch}`);
-    }
-    break;
-  case "freebsd":
-    if (arch !== "x64") {
-      throw new Error(`Unsupported architecture on FreeBSD: ${arch}`);
-    }
-    localFileExisted = existsSync(
-      join(__dirname, "lancedb-nodejs.freebsd-x64.node"),
-    );
-    try {
-      if (localFileExisted) {
-        nativeBinding = require("./lancedb-nodejs.freebsd-x64.node");
-      } else {
-        nativeBinding = require("lancedb-freebsd-x64");
-      }
-    } catch (e) {
-      loadError = e;
-    }
-    break;
-  case "linux":
-    switch (arch) {
-      case "x64":
-        if (isMusl()) {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-x64-musl.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-x64-musl.node");
-            } else {
-              nativeBinding = require("lancedb-linux-x64-musl");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        } else {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-x64-gnu.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-x64-gnu.node");
-            } else {
-              nativeBinding = require("lancedb-linux-x64-gnu");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        }
-        break;
-      case "arm64":
-        if (isMusl()) {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-arm64-musl.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-arm64-musl.node");
-            } else {
-              nativeBinding = require("lancedb-linux-arm64-musl");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        } else {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-arm64-gnu.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-arm64-gnu.node");
-            } else {
-              nativeBinding = require("lancedb-linux-arm64-gnu");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        }
-        break;
-      case "arm":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.linux-arm-gnueabihf.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.linux-arm-gnueabihf.node");
-          } else {
-            nativeBinding = require("lancedb-linux-arm-gnueabihf");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      case "riscv64":
-        if (isMusl()) {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-riscv64-musl.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-riscv64-musl.node");
-            } else {
-              nativeBinding = require("lancedb-linux-riscv64-musl");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        } else {
-          localFileExisted = existsSync(
-            join(__dirname, "lancedb-nodejs.linux-riscv64-gnu.node"),
-          );
-          try {
-            if (localFileExisted) {
-              nativeBinding = require("./lancedb-nodejs.linux-riscv64-gnu.node");
-            } else {
-              nativeBinding = require("lancedb-linux-riscv64-gnu");
-            }
-          } catch (e) {
-            loadError = e;
-          }
-        }
-        break;
-      case "s390x":
-        localFileExisted = existsSync(
-          join(__dirname, "lancedb-nodejs.linux-s390x-gnu.node"),
-        );
-        try {
-          if (localFileExisted) {
-            nativeBinding = require("./lancedb-nodejs.linux-s390x-gnu.node");
-          } else {
-            nativeBinding = require("lancedb-linux-s390x-gnu");
-          }
-        } catch (e) {
-          loadError = e;
-        }
-        break;
-      default:
-        throw new Error(`Unsupported architecture on Linux: ${arch}`);
-    }
-    break;
-  default:
-    throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`);
-}
-
-if (!nativeBinding) {
-  if (loadError) {
-    throw loadError;
-  }
-  throw new Error(`Failed to load native binding`);
-}
-
-const {
-  Connection,
-  Index,
-  RecordBatchIterator,
-  Query,
-  VectorQuery,
-  Table,
-  WriteMode,
-  connect,
-} = nativeBinding;
-
-module.exports.Connection = Connection;
-module.exports.Index = Index;
-module.exports.RecordBatchIterator = RecordBatchIterator;
-module.exports.Query = Query;
-module.exports.VectorQuery = VectorQuery;
-module.exports.Table = Table;
-module.exports.WriteMode = WriteMode;
-module.exports.connect = connect;
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -20,7 +20,7 @@ import {
  VectorQuery as NativeVectorQuery,
 } from "./native";
 import { type IvfPqOptions } from "./indices";
-class RecordBatchIterator implements AsyncIterator<RecordBatch> {
+export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
  private promisedInner?: Promise<NativeBatchIterator>;
  private inner?: NativeBatchIterator;

--- a/nodejs/npm/darwin-arm64/README.md
+++ b/nodejs/npm/darwin-arm64/README.md
@@ -1,3 +1,3 @@
-# `lancedb-darwin-arm64`
+# `@lancedb/lancedb-darwin-arm64`

-This is the **aarch64-apple-darwin** binary for `lancedb`
+This is the **aarch64-apple-darwin** binary for `@lancedb/lancedb`
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "lancedb-darwin-arm64",
+  "name": "@lancedb/lancedb-darwin-arm64",
  "version": "0.4.3",
  "os": [
    "darwin"
@@ -11,7 +11,7 @@
  "files": [
    "lancedb.darwin-arm64.node"
  ],
-  "license": "MIT",
+  "license": "Apache 2.0",
  "engines": {
    "node": ">= 18"
  }
--- a/nodejs/npm/darwin-x64/README.md
+++ b/nodejs/npm/darwin-x64/README.md
@@ -1,3 +1,3 @@
-# `lancedb-darwin-x64`
+# `@lancedb/lancedb-darwin-x64`

-This is the **x86_64-apple-darwin** binary for `lancedb`
+This is the **x86_64-apple-darwin** binary for `@lancedb/lancedb`
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "lancedb-darwin-x64",
+  "name": "@lancedb/lancedb-darwin-x64",
  "version": "0.4.3",
  "os": [
    "darwin"
@@ -11,7 +11,7 @@
  "files": [
    "lancedb.darwin-x64.node"
  ],
-  "license": "MIT",
+  "license": "Apache 2.0",
  "engines": {
    "node": ">= 18"
  }
--- a/nodejs/npm/linux-arm64-gnu/README.md
+++ b/nodejs/npm/linux-arm64-gnu/README.md
@@ -1,3 +1,3 @@
-# `lancedb-linux-arm64-gnu`
+# `@lancedb/lancedb-linux-arm64-gnu`

-This is the **aarch64-unknown-linux-gnu** binary for `lancedb`
+This is the **aarch64-unknown-linux-gnu** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "lancedb-linux-arm64-gnu",
+  "name": "@lancedb/lancedb-linux-arm64-gnu",
  "version": "0.4.3",
  "os": [
    "linux"
@@ -11,9 +11,9 @@
  "files": [
    "lancedb.linux-arm64-gnu.node"
  ],
-  "license": "MIT",
+  "license": "Apache 2.0",
  "engines": {
-    "node": ">= 10"
+    "node": ">= 18"
  },
  "libc": [
    "glibc"
--- a/nodejs/npm/linux-x64-gnu/README.md
+++ b/nodejs/npm/linux-x64-gnu/README.md
@@ -1,3 +1,3 @@
-# `lancedb-linux-x64-gnu`
+# `@lancedb/lancedb-linux-x64-gnu`

-This is the **x86_64-unknown-linux-gnu** binary for `lancedb`
+This is the **x86_64-unknown-linux-gnu** binary for `@lancedb/lancedb`
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "lancedb-linux-x64-gnu",
+  "name": "@lancedb/lancedb-linux-x64-gnu",
  "version": "0.4.3",
  "os": [
    "linux"
@@ -11,9 +11,9 @@
  "files": [
    "lancedb.linux-x64-gnu.node"
  ],
-  "license": "MIT",
+  "license": "Apache 2.0",
  "engines": {
-    "node": ">= 10"
+    "node": ">= 18"
  },
  "libc": [
    "glibc"
--- a/nodejs/npm/win32-x64-msvc/README.md
+++ b/nodejs/npm/win32-x64-msvc/README.md
@@ -0,0 +1,3 @@
+# `@lancedb/lancedb-win32-x64-msvc`
+
+This is the **x86_64-pc-windows-msvc** binary for `@lancedb/lancedb`
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "@lancedb/lancedb-win32-x64-msvc",
+  "version": "0.4.3",
+  "os": [
+    "win32"
+  ],
+  "cpu": [
+    "x64"
+  ],
+  "main": "lancedb.win32-x64-msvc.node",
+  "files": [
+    "lancedb.win32-x64-msvc.node"
+  ],
+  "license": "Apache 2.0",
+  "engines": {
+    "node": ">= 18"
+  }
+}
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,11 +1,11 @@
 {
-  "name": "lancedb",
+  "name": "@lancedb/lancedb",
  "version": "0.4.3",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
-      "name": "lancedb",
+      "name": "@lancedb/lancedb",
      "version": "0.4.3",
      "cpu": [
        "x64",
@@ -15,8 +15,12 @@
      "os": [
        "darwin",
        "linux",
-        "windows"
+        "win32"
      ],
+      "dependencies": {
+        "apache-arrow": "^15.0.0",
+        "openai": "^4.29.2"
+      },
      "devDependencies": {
        "@napi-rs/cli": "^2.18.0",
        "@types/jest": "^29.1.2",
@@ -29,6 +33,7 @@
        "eslint-plugin-jsdoc": "^48.2.1",
        "jest": "^29.7.0",
        "prettier": "^3.1.0",
+        "shx": "^0.3.4",
        "tmp": "^0.2.3",
        "ts-jest": "^29.1.2",
        "typedoc": "^0.25.7",
@@ -40,14 +45,11 @@
        "node": ">= 18"
      },
      "optionalDependencies": {
-        "lancedb-darwin-arm64": "0.4.3",
-        "lancedb-darwin-x64": "0.4.3",
-        "lancedb-linux-arm64-gnu": "0.4.3",
-        "lancedb-linux-x64-gnu": "0.4.3",
-        "openai": "^4.28.4"
-      },
-      "peerDependencies": {
-        "apache-arrow": "^15.0.0"
+        "@lancedb/lancedb-darwin-arm64": "0.4.3",
+        "@lancedb/lancedb-darwin-x64": "0.4.3",
+        "@lancedb/lancedb-linux-arm64-gnu": "0.4.3",
+        "@lancedb/lancedb-linux-x64-gnu": "0.4.3",
+        "@lancedb/lancedb-win32-x64-msvc": "0.4.3"
      }
    },
    "node_modules/@75lb/deep-merge": {
@@ -1317,6 +1319,66 @@
        "@jridgewell/sourcemap-codec": "^1.4.14"
      }
    },
+    "node_modules/@lancedb/lancedb-darwin-arm64": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-arm64/-/lancedb-darwin-arm64-0.4.3.tgz",
+      "integrity": "sha512-+kxuWUK9vtLBbjFMkIKeQ32kxK2tgvZRCQaU1I3RJ3+dLmDIVeIj+KJSlMelkKa2QC4JoyHQi9Ty1PdS2DojmQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/@lancedb/lancedb-darwin-x64": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-x64/-/lancedb-darwin-x64-0.4.3.tgz",
+      "integrity": "sha512-JYvsSYxTOa/7OMojulz9h0gN2FwvypG/6l6dpLkViZ5LDvRcfVyDTzOLcOJkFn+db4TKeBOVyMWnnpDKaB+jLA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/@lancedb/lancedb-linux-x64-gnu": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-x64-gnu/-/lancedb-linux-x64-gnu-0.4.3.tgz",
+      "integrity": "sha512-jDANHchWNGmu1wfAyBk0apoFlLxtJ7FRc31pAQ3tKE4fwlgG7bUcaTX6s5C3vMNWXnyQLQtVuWZNXi2nVj879g==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/@lancedb/lancedb-win32-x64-msvc": {
+      "version": "0.4.3",
+      "resolved": "https://registry.npmjs.org/@lancedb/lancedb-win32-x64-msvc/-/lancedb-win32-x64-msvc-0.4.3.tgz",
+      "integrity": "sha512-qADveXyv4YzllIbOOq8soqFfL7p7I35uhrD3PcTvj4Qxuo6q7pgQWQz2Mt3kGBpyPkH2yE4wWAGJhayShLRbiQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 18"
+      }
+    },
    "node_modules/@napi-rs/cli": {
      "version": "2.18.0",
      "resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-2.18.0.tgz",
@@ -1396,7 +1458,6 @@
      "version": "0.5.6",
      "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.6.tgz",
      "integrity": "sha512-aYX01Ke9hunpoCexYAgQucEpARGQ5w/cqHFrIR+e9gdKb1QWTsVJuTJ2ozQzIAxLyRQe/m+2RqzkyOOGiMKRQA==",
-      "peer": true,
      "dependencies": {
        "tslib": "^2.4.0"
      }
@@ -1445,8 +1506,7 @@
    "node_modules/@types/command-line-args": {
      "version": "5.2.3",
      "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.3.tgz",
-      "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==",
-      "peer": true
+      "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw=="
    },
    "node_modules/@types/command-line-usage": {
      "version": "5.0.2",
@@ -1514,7 +1574,6 @@
      "version": "2.6.11",
      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz",
      "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==",
-      "optional": true,
      "dependencies": {
        "@types/node": "*",
        "form-data": "^4.0.0"
@@ -1783,7 +1842,6 @@
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
      "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
-      "optional": true,
      "dependencies": {
        "event-target-shim": "^5.0.0"
      },
@@ -1816,7 +1874,6 @@
      "version": "4.5.0",
      "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
      "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
-      "optional": true,
      "dependencies": {
        "humanize-ms": "^1.2.1"
      },
@@ -1913,7 +1970,6 @@
      "version": "15.0.0",
      "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-15.0.0.tgz",
      "integrity": "sha512-e6aunxNKM+woQf137ny3tp/xbLjFJS2oGQxQhYGqW6dGeIwNV1jOeEAeR6sS2jwAI2qLO83gYIP2MBz02Gw5Xw==",
-      "peer": true,
      "dependencies": {
        "@swc/helpers": "^0.5.2",
        "@types/command-line-args": "^5.2.1",
@@ -2001,8 +2057,7 @@
    "node_modules/asynckit": {
      "version": "0.4.0",
      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
-      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
-      "optional": true
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
    },
    "node_modules/babel-jest": {
      "version": "29.7.0",
@@ -2129,8 +2184,7 @@
    "node_modules/base-64": {
      "version": "0.1.0",
      "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
-      "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==",
-      "optional": true
+      "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
    },
    "node_modules/brace-expansion": {
      "version": "1.1.11",
@@ -2296,7 +2350,6 @@
      "version": "0.0.2",
      "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
      "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==",
-      "optional": true,
      "engines": {
        "node": "*"
      }
@@ -2357,7 +2410,6 @@
      "version": "1.0.8",
      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
-      "optional": true,
      "dependencies": {
        "delayed-stream": "~1.0.0"
      },
@@ -2469,7 +2521,6 @@
      "version": "0.0.2",
      "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
      "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==",
-      "optional": true,
      "engines": {
        "node": "*"
      }
@@ -2530,7 +2581,6 @@
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
-      "optional": true,
      "engines": {
        "node": ">=0.4.0"
      }
@@ -2557,7 +2607,6 @@
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
      "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
-      "optional": true,
      "dependencies": {
        "base-64": "^0.1.0",
        "md5": "^2.3.0"
@@ -2862,7 +2911,6 @@
      "version": "5.0.1",
      "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
      "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
-      "optional": true,
      "engines": {
        "node": ">=6"
      }
@@ -3024,7 +3072,6 @@
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
      "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
-      "optional": true,
      "dependencies": {
        "asynckit": "^0.4.0",
        "combined-stream": "^1.0.8",
@@ -3037,14 +3084,12 @@
    "node_modules/form-data-encoder": {
      "version": "1.7.2",
      "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
-      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==",
-      "optional": true
+      "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="
    },
    "node_modules/formdata-node": {
      "version": "4.4.1",
      "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
      "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
-      "optional": true,
      "dependencies": {
        "node-domexception": "1.0.0",
        "web-streams-polyfill": "4.0.0-beta.3"
@@ -3057,7 +3102,6 @@
      "version": "4.0.0-beta.3",
      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
      "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
-      "optional": true,
      "engines": {
        "node": ">= 14"
      }
@@ -3272,7 +3316,6 @@
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
      "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
-      "optional": true,
      "dependencies": {
        "ms": "^2.0.0"
      }
@@ -3355,6 +3398,15 @@
      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
      "dev": true
    },
+    "node_modules/interpret": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.4.0.tgz",
+      "integrity": "sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
    "node_modules/is-arrayish": {
      "version": "0.2.1",
      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
@@ -3364,8 +3416,7 @@
    "node_modules/is-buffer": {
      "version": "1.1.6",
      "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
-      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==",
-      "optional": true
+      "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
    },
    "node_modules/is-builtin-module": {
      "version": "3.2.1",
@@ -4458,7 +4509,6 @@
      "version": "2.3.0",
      "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
      "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
-      "optional": true,
      "dependencies": {
        "charenc": "0.0.2",
        "crypt": "0.0.2",
@@ -4497,7 +4547,6 @@
      "version": "1.52.0",
      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
-      "optional": true,
      "engines": {
        "node": ">= 0.6"
      }
@@ -4506,7 +4555,6 @@
      "version": "2.1.35",
      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
-      "optional": true,
      "dependencies": {
        "mime-db": "1.52.0"
      },
@@ -4538,8 +4586,7 @@
    "node_modules/ms": {
      "version": "2.1.3",
      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "optional": true
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
    },
    "node_modules/natural-compare": {
      "version": "1.4.0",
@@ -4567,7 +4614,6 @@
          "url": "https://paypal.me/jimmywarting"
        }
      ],
-      "optional": true,
      "engines": {
        "node": ">=10.5.0"
      }
@@ -4576,7 +4622,6 @@
      "version": "2.7.0",
      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
-      "optional": true,
      "dependencies": {
        "whatwg-url": "^5.0.0"
      },
@@ -4623,10 +4668,9 @@
      }
    },
    "node_modules/openai": {
-      "version": "4.28.4",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-4.28.4.tgz",
-      "integrity": "sha512-RNIwx4MT/F0zyizGcwS+bXKLzJ8QE9IOyigDG/ttnwB220d58bYjYFp0qjvGwEFBO6+pvFVIDABZPGDl46RFsg==",
-      "optional": true,
+      "version": "4.29.2",
+      "resolved": "https://registry.npmjs.org/openai/-/openai-4.29.2.tgz",
+      "integrity": "sha512-cPkT6zjEcE4qU5OW/SoDDuXEsdOLrXlAORhzmaguj5xZSPlgKvLhi27sFWhLKj07Y6WKNWxcwIbzm512FzTBNQ==",
      "dependencies": {
        "@types/node": "^18.11.18",
        "@types/node-fetch": "^2.6.4",
@@ -4643,10 +4687,9 @@
      }
    },
    "node_modules/openai/node_modules/@types/node": {
-      "version": "18.19.20",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.20.tgz",
-      "integrity": "sha512-SKXZvI375jkpvAj8o+5U2518XQv76mAsixqfXiVyWyXZbVWQK25RurFovYpVIxVzul0rZoH58V/3SkEnm7s3qA==",
-      "optional": true,
+      "version": "18.19.26",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.26.tgz",
+      "integrity": "sha512-+wiMJsIwLOYCvUqSdKTrfkS8mpTp+MPINe6+Np4TAGFWWRWiBQ5kSq9nZGCSPkzx9mvT+uEukzpX4MOSCydcvw==",
      "dependencies": {
        "undici-types": "~5.26.4"
      }
@@ -4996,6 +5039,18 @@
      "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==",
      "dev": true
    },
+    "node_modules/rechoir": {
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/rechoir/-/rechoir-0.6.2.tgz",
+      "integrity": "sha512-HFM8rkZ+i3zrV+4LQjwQ0W+ez98pApMGM3HUrN04j3CqzPOzl9nmP15Y8YXNm8QHGv/eacOVEjqhmWpkRV0NAw==",
+      "dev": true,
+      "dependencies": {
+        "resolve": "^1.1.6"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
    "node_modules/repeat-string": {
      "version": "1.6.1",
      "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz",
@@ -5145,6 +5200,23 @@
        "node": ">=8"
      }
    },
+    "node_modules/shelljs": {
+      "version": "0.8.5",
+      "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.5.tgz",
+      "integrity": "sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==",
+      "dev": true,
+      "dependencies": {
+        "glob": "^7.0.0",
+        "interpret": "^1.0.0",
+        "rechoir": "^0.6.2"
+      },
+      "bin": {
+        "shjs": "bin/shjs"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
    "node_modules/shiki": {
      "version": "0.14.7",
      "resolved": "https://registry.npmjs.org/shiki/-/shiki-0.14.7.tgz",
@@ -5157,6 +5229,22 @@
        "vscode-textmate": "^8.0.0"
      }
    },
+    "node_modules/shx": {
+      "version": "0.3.4",
+      "resolved": "https://registry.npmjs.org/shx/-/shx-0.3.4.tgz",
+      "integrity": "sha512-N6A9MLVqjxZYcVn8hLmtneQWIJtp8IKzMP4eMnx+nqkvXoqinUPCbUFLp2UcWTEIUONhlk0ewxr/jaVGlc+J+g==",
+      "dev": true,
+      "dependencies": {
+        "minimist": "^1.2.3",
+        "shelljs": "^0.8.5"
+      },
+      "bin": {
+        "shx": "lib/cli.js"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
    "node_modules/signal-exit": {
      "version": "3.0.7",
      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
@@ -5432,8 +5520,7 @@
    "node_modules/tr46": {
      "version": "0.0.3",
      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
-      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
-      "optional": true
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
    },
    "node_modules/ts-api-utils": {
      "version": "1.0.3",
@@ -5929,7 +6016,6 @@
      "version": "3.3.3",
      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
      "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
-      "optional": true,
      "engines": {
        "node": ">= 8"
      }
@@ -5937,14 +6023,12 @@
    "node_modules/webidl-conversions": {
      "version": "3.0.1",
      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
-      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
-      "optional": true
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
    },
    "node_modules/whatwg-url": {
      "version": "5.0.0",
      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
-      "optional": true,
      "dependencies": {
        "tr46": "~0.0.3",
        "webidl-conversions": "^3.0.0"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -1,17 +1,18 @@
 {
-  "name": "lancedb",
+  "name": "@lancedb/lancedb",
  "version": "0.4.3",
  "main": "./dist/index.js",
  "types": "./dist/index.d.ts",
  "napi": {
-    "name": "lancedb-nodejs",
+    "name": "lancedb",
    "triples": {
      "defaults": false,
      "additional": [
        "aarch64-apple-darwin",
        "aarch64-unknown-linux-gnu",
        "x86_64-apple-darwin",
-        "x86_64-unknown-linux-gnu"
+        "x86_64-unknown-linux-gnu",
+        "x86_64-pc-windows-msvc"
      ]
    }
  },
@@ -28,6 +29,7 @@
    "eslint-plugin-jsdoc": "^48.2.1",
    "jest": "^29.7.0",
    "prettier": "^3.1.0",
+    "shx": "^0.3.4",
    "tmp": "^0.2.3",
    "ts-jest": "^29.1.2",
    "typedoc": "^0.25.7",
@@ -48,13 +50,14 @@
  "os": [
    "darwin",
    "linux",
-    "windows"
+    "win32"
  ],
  "scripts": {
    "artifacts": "napi artifacts",
-    "build:native": "napi build --platform --release --js lancedb/native.js --dts lancedb/native.d.ts dist/",
    "build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
-    "build": "npm run build:debug && tsc -b",
+    "build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
+    "build": "npm run build:debug && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
+    "build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
    "chkformat": "prettier . --check",
    "docs": "typedoc --plugin typedoc-plugin-markdown lancedb/index.ts",
    "lint": "eslint lancedb && eslint __test__",
@@ -64,13 +67,14 @@
    "version": "napi version"
  },
  "optionalDependencies": {
-    "lancedb-darwin-arm64": "0.4.3",
-    "lancedb-darwin-x64": "0.4.3",
-    "lancedb-linux-arm64-gnu": "0.4.3",
-    "lancedb-linux-x64-gnu": "0.4.3",
-    "openai": "^4.28.4"
+    "@lancedb/lancedb-darwin-arm64": "0.4.3",
+    "@lancedb/lancedb-darwin-x64": "0.4.3",
+    "@lancedb/lancedb-linux-arm64-gnu": "0.4.3",
+    "@lancedb/lancedb-linux-x64-gnu": "0.4.3",
+    "@lancedb/lancedb-win32-x64-msvc": "0.4.3"
  },
-  "peerDependencies": {
+  "dependencies": {
+    "openai": "^4.29.2",
    "apache-arrow": "^15.0.0"
  }
 }
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -145,34 +145,20 @@ async def connect_async(
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
-    request_thread_pool: int or ThreadPoolExecutor, optional
-        The thread pool to use for making batch requests to the LanceDB Cloud API.
-        If an integer, then a ThreadPoolExecutor will be created with that
-        number of threads. If None, then a ThreadPoolExecutor will be created
-        with the default number of threads. If a ThreadPoolExecutor, then that
-        executor will be used for making requests. This is for LanceDB Cloud
-        only and is only used when making batch requests (i.e., passing in
-        multiple queries to the search method at once).

    Examples
    --------

-    For a local directory, provide a path for the database:
-
    >>> import lancedb
-    >>> db = lancedb.connect("~/.lancedb")
-
-    For object storage, use a URI prefix:
-
-    >>> db = lancedb.connect("s3://my-bucket/lancedb")
-
-    Connect to LancdDB cloud:
-
-    >>> db = lancedb.connect("db://my_database", api_key="ldb_...")
+    >>> async def doctest_example():
+    ...     # For a local directory, provide a path to the database
+    ...     db = await lancedb.connect_async("~/.lancedb")
+    ...     # For object storage, use a URI prefix
+    ...     db = await lancedb.connect_async("s3://my-bucket/lancedb")

    Returns
    -------
-    conn : DBConnection
+    conn : AsyncConnection
        A connection to a LanceDB database.
    """
    if read_consistency_interval is not None:
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -25,13 +25,18 @@ from overrides import EnforceOverrides, override
 from pyarrow import fs

 from lancedb.common import data_to_reader, validate_schema
-from lancedb.embeddings.registry import EmbeddingFunctionRegistry
 from lancedb.utils.events import register_event

 from ._lancedb import connect as lancedb_connect
 from .pydantic import LanceModel
 from .table import AsyncTable, LanceTable, Table, _sanitize_data
-from .util import fs_from_uri, get_uri_location, get_uri_scheme, join_uri
+from .util import (
+    fs_from_uri,
+    get_uri_location,
+    get_uri_scheme,
+    join_uri,
+    validate_table_name,
+)

 if TYPE_CHECKING:
    from datetime import timedelta
@@ -387,6 +392,7 @@ class LanceDBConnection(DBConnection):
        """
        if mode.lower() not in ["create", "overwrite"]:
            raise ValueError("mode must be either 'create' or 'overwrite'")
+        validate_table_name(name)

        tbl = LanceTable.create(
            self,
@@ -444,16 +450,17 @@ class LanceDBConnection(DBConnection):
 class AsyncConnection(object):
    """An active LanceDB connection

-    To obtain a connection you can use the [connect] function.
+    To obtain a connection you can use the [connect_async][lancedb.connect_async]
+    function.

    This could be a native connection (using lance) or a remote connection (e.g. for
    connecting to LanceDb Cloud)

    Local connections do not currently hold any open resources but they may do so in the
    future (for example, for shared cache or connections to catalog services) Remote
-    connections represent an open connection to the remote server.  The [close] method
-    can be used to release any underlying resources eagerly.  The connection can also
-    be used as a context manager:
+    connections represent an open connection to the remote server.  The
+    [close][lancedb.db.AsyncConnection.close] method can be used to release any
+    underlying resources eagerly.  The connection can also be used as a context manager.

    Connections can be shared on multiple threads and are expected to be long lived.
    Connections can also be used as a context manager, however, in many cases a single
@@ -464,10 +471,9 @@ class AsyncConnection(object):
    Examples
    --------

-    >>> import asyncio
    >>> import lancedb
-    >>> async def my_connect():
-    ...   with await lancedb.connect("/tmp/my_dataset") as conn:
+    >>> async def doctest_example():
+    ...   with await lancedb.connect_async("/tmp/my_dataset") as conn:
    ...     # do something with the connection
    ...     pass
    ...   # conn is closed here
@@ -528,9 +534,8 @@ class AsyncConnection(object):
        exist_ok: Optional[bool] = None,
        on_bad_vectors: Optional[str] = None,
        fill_value: Optional[float] = None,
-        embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
    ) -> AsyncTable:
-        """Create a [Table][lancedb.table.Table] in the database.
+        """Create an [AsyncTable][lancedb.table.AsyncTable] in the database.

        Parameters
        ----------
@@ -569,7 +574,7 @@ class AsyncConnection(object):

        Returns
        -------
-        LanceTable
+        AsyncTable
            A reference to the newly created table.

        !!! note
@@ -583,12 +588,14 @@ class AsyncConnection(object):
        Can create with list of tuples or dictionaries:

        >>> import lancedb
-        >>> db = lancedb.connect("./.lancedb")
-        >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
-        ...         {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
-        >>> db.create_table("my_table", data)
-        LanceTable(connection=..., name="my_table")
-        >>> db["my_table"].head()
+        >>> async def doctest_example():
+        ...     db = await lancedb.connect_async("./.lancedb")
+        ...     data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
+        ...             {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
+        ...     my_table = await db.create_table("my_table", data)
+        ...     print(await my_table.query().limit(5).to_arrow())
+        >>> import asyncio
+        >>> asyncio.run(doctest_example())
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
@@ -607,9 +614,11 @@ class AsyncConnection(object):
        ...    "lat": [45.5, 40.1],
        ...    "long": [-122.7, -74.1]
        ... })
-        >>> db.create_table("table2", data)
-        LanceTable(connection=..., name="table2")
-        >>> db["table2"].head()
+        >>> async def pandas_example():
+        ...     db = await lancedb.connect_async("./.lancedb")
+        ...     my_table = await db.create_table("table2", data)
+        ...     print(await my_table.query().limit(5).to_arrow())
+        >>> asyncio.run(pandas_example())
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
@@ -629,9 +638,11 @@ class AsyncConnection(object):
        ...   pa.field("lat", pa.float32()),
        ...   pa.field("long", pa.float32())
        ... ])
-        >>> db.create_table("table3", data, schema = custom_schema)
-        LanceTable(connection=..., name="table3")
-        >>> db["table3"].head()
+        >>> async def with_schema():
+        ...     db = await lancedb.connect_async("./.lancedb")
+        ...     my_table = await db.create_table("table3", data, schema = custom_schema)
+        ...     print(await my_table.query().limit(5).to_arrow())
+        >>> asyncio.run(with_schema())
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
@@ -663,9 +674,10 @@ class AsyncConnection(object):
        ...     pa.field("item", pa.utf8()),
        ...     pa.field("price", pa.float32()),
        ... ])
-        >>> db.create_table("table4", make_batches(), schema=schema)
-        LanceTable(connection=..., name="table4")
-
+        >>> async def iterable_example():
+        ...     db = await lancedb.connect_async("./.lancedb")
+        ...     await db.create_table("table4", make_batches(), schema=schema)
+        >>> asyncio.run(iterable_example())
        """
        if inspect.isclass(schema) and issubclass(schema, LanceModel):
            # convert LanceModel to pyarrow schema
@@ -674,12 +686,6 @@ class AsyncConnection(object):
            schema = schema.to_arrow_schema()

        metadata = None
-        if embedding_functions is not None:
-            # If we passed in embedding functions explicitly
-            # then we'll override any schema metadata that
-            # may was implicitly specified by the LanceModel schema
-            registry = EmbeddingFunctionRegistry.get_instance()
-            metadata = registry.get_table_metadata(embedding_functions)

        # Defining defaults here and not in function prototype.  In the future
        # these defaults will move into rust so better to keep them as None.
@@ -760,11 +766,11 @@ class AsyncConnection(object):
        name: str
            The name of the table.
        """
-        raise NotImplementedError
+        await self._inner.drop_table(name)

    async def drop_database(self):
        """
        Drop database
        This is the same thing as dropping all the tables
        """
-        raise NotImplementedError
+        await self._inner.drop_db()
--- a/python/python/lancedb/embeddings/base.py
+++ b/python/python/lancedb/embeddings/base.py
@@ -10,13 +10,18 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+import os
 from abc import ABC, abstractmethod
 from typing import List, Union

 import numpy as np
 import pyarrow as pa
 from pydantic import BaseModel, Field, PrivateAttr
+from tqdm import tqdm

+import lancedb
+
+from .fine_tuner import QADataset
 from .utils import TEXT, retry_with_exponential_backoff


@@ -126,6 +131,22 @@ class EmbeddingFunction(BaseModel, ABC):
    def __hash__(self) -> int:
        return hash(frozenset(vars(self).items()))

+    def finetune(self, dataset: QADataset, *args, **kwargs):
+        """
+        Finetune the embedding function on a dataset
+        """
+        raise NotImplementedError(
+            "Finetuning is not supported for this embedding function"
+        )
+
+    def evaluate(self, dataset: QADataset, top_k=5, path=None, *args, **kwargs):
+        """
+        Evaluate the embedding function on a dataset
+        """
+        raise NotImplementedError(
+            "Evaluation is not supported for this embedding function"
+        )
+

 class EmbeddingFunctionConfig(BaseModel):
    """
@@ -159,3 +180,52 @@ class TextEmbeddingFunction(EmbeddingFunction):
        Generate the embeddings for the given texts
        """
        pass
+
+    def evaluate(self, dataset: QADataset, top_k=5, path=None, *args, **kwargs):
+        """
+        Evaluate the embedding function on a dataset. This calculates the hit-rate for
+        the top-k retrieved documents for each query in the dataset. Assumes that the
+        first relevant document is the expected document.
+        Pro - Should work for any embedding model
+        Con - Returns every simple metric.
+        Parameters
+        ----------
+        dataset: QADataset
+            The dataset to evaluate on
+
+        Returns
+        -------
+        dict
+            The evaluation results
+        """
+        corpus = dataset.corpus
+        queries = dataset.queries
+        relevant_docs = dataset.relevant_docs
+        path = path or os.path.join(os.getcwd(), "eval")
+        db = lancedb.connect(path)
+
+        class Schema(lancedb.pydantic.LanceModel):
+            id: str
+            text: str = self.SourceField()
+            vector: lancedb.pydantic.Vector(self.ndims()) = self.VectorField()
+
+        retriever = db.create_table("eval", schema=Schema, mode="overwrite")
+        pylist = [{"id": str(k), "text": v} for k, v in corpus.items()]
+        retriever.add(pylist)
+
+        eval_results = []
+        for query_id, query in tqdm(queries.items()):
+            retrieved_nodes = retriever.search(query).limit(top_k).to_list()
+            retrieved_ids = [node["id"] for node in retrieved_nodes]
+            expected_id = relevant_docs[query_id][0]
+            is_hit = expected_id in retrieved_ids  # assume 1 relevant doc
+
+            eval_result = {
+                "is_hit": is_hit,
+                "retrieved": retrieved_ids,
+                "expected": expected_id,
+                "query": query_id,
+            }
+            eval_results.append(eval_result)
+
+        return eval_results
--- a/python/python/lancedb/embeddings/fine_tuner/README.md
+++ b/python/python/lancedb/embeddings/fine_tuner/README.md
@@ -0,0 +1,133 @@
+Fine-tuning workflow for embeddings consists for the following parts:
+
+### QADataset
+This class is used for managing the data for fine-tuning. It contains the following builder methods:
+```
+- from_llm(
+        nodes: 'List[TextChunk]' ,
+        llm: BaseLLM,
+        qa_generate_prompt_tmpl: str = DEFAULT_PROMPT_TMPL,
+        num_questions_per_chunk: int = 2,
+) -> "QADataset"
+```
+Create synthetic data from a language model and text chunks of the original document on which the model is to be fine-tuned.
+
+```python
+
+from_responses(docs: List['TextChunk'], queries: Dict[str, str], relevant_docs: Dict[str, List[str]])-> "QADataset"
+```
+Create dataset from queries and responses based on a real-world scenario. Designed to be used for knowledge distillation from a larger LLM to a smaller one.
+
+It also contains the following data attributes:
+```
+    queries (Dict[str, str]): Dict id -> query.
+    corpus (Dict[str, str]): Dict id -> string.
+    relevant_docs (Dict[str, List[str]]): Dict query id -> list of doc ids.
+```
+
+### TextChunk
+This class is used for managing the data for fine-tuning. It is designed to allow working with and standardize various text splitting/pre-processing tools like llama-index and langchain. It contains the following attributes:
+```
+    text: str
+    id: str
+    metadata: Dict[str, Any] = {}
+```
+
+Builder Methods:
+
+```python
+from_llama_index_node(node) -> "TextChunk"
+```
+Create a text chunk from a llama index node.
+
+```python
+from_langchain_node(node) -> "TextChunk"
+```
+Create a text chunk from a langchain index node.
+
+```python
+from_chunk(cls, chunk: str, metadata: dict = {}) -> "TextChunk"
+```
+Create a text chunk from a string.
+
+### FineTuner
+This class is used for fine-tuning embeddings. It is exposed to the user via a high-level function in the base embedding api.
+```python
+class BaseEmbeddingTuner(ABC):
+    """Base Embedding finetuning engine."""
+
+    @abstractmethod
+    def finetune(self) -> None:
+        """Goes off and does stuff."""
+
+    def helper(self) -> None:
+        """A helper method."""
+        pass
+```
+
+### Embedding API finetuning implementation
+Each embedding API needs to implement `finetune` method in order to support fine-tuning. A vanilla evaluation technique has been implemented in the `BaseEmbedding` class that calculates hit_rate @ `top_k`.
+
+### Fine-tuning workflow
+The fine-tuning workflow is as follows:
+1. Create a `QADataset` object.
+2. Initialize any embedding function using LanceDB embedding API
+3. Call `finetune` method on the embedding object with the `QADataset` object as an argument.
+4. Evaluate the fine-tuned model using the `evaluate` method in the embedding API.
+
+# End-to-End Examples
+The following is an example of how to fine-tune an embedding model using the LanceDB embedding API.
+
+## Example 1: Fine-tuning from a synthetic dataset
+```python
+import pandas as pd
+
+from lancedb.embeddings.fine_tuner.llm import Openai
+from lancedb.embeddings.fine_tuner.dataset import QADataset, TextChunk
+from lancedb.pydantic import LanceModel, Vector
+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.schema import MetadataMode
+from lancedb.embeddings import get_registry
+
+# 1. Create a QADataset object
+url = "uber10k.pdf"
+reader = SimpleDirectoryReader(input_files=url)
+docs = reader.load_data()
+
+parser = SentenceSplitter()
+nodes = parser.get_nodes_from_documents(docs)
+
+if os.path.exists(name):
+    ds = QADataset.load(name)
+else:
+    llm = Openai()
+    
+    # convert Llama-index TextNode to TextChunk
+    chunks = [TextChunk.from_llama_index_node(node) for node in nodes]
+
+    ds = QADataset.from_llm(chunks, llm)
+    ds.save(name)
+
+# 2. Initialize the embedding model
+model = get_registry().get("sentence-transformers").create()
+
+# 3. Fine-tune the model
+model.finetune(trainset=ds, path="model_finetuned", epochs=4)
+
+# 4. Evaluate the fine-tuned model
+base = get_registry().get("sentence-transformers").create()
+tuned = get_registry().get("sentence-transformers").create(name="./model_finetuned_1")
+openai = get_registry().get("openai").create(name="text-embedding-3-large")
+
+
+rs1 = base.evaluate(trainset, path="val_res")
+rs2 = tuned.evaluate(trainset, path="val_res")
+rs3 = openai.evaluate(trainset)
+
+print("openai-embedding-v3 hit-rate  - ", pd.DataFrame(rs3)["is_hit"].mean())
+print("fine-tuned hit-rate  - ", pd.DataFrame(rs2)["is_hit"].mean())
+print("Base model hite-rate - ", pd.DataFrame(rs1)["is_hit"].mean())
+```
+
+
--- a/python/python/lancedb/embeddings/fine_tuner/init.py
+++ b/python/python/lancedb/embeddings/fine_tuner/init.py
@@ -0,0 +1,4 @@
+from .dataset import QADataset, TextChunk
+from .llm import Gemini, Openai
+
+__all__ = ["QADataset", "TextChunk", "Openai", "Gemini"]
--- a/python/python/lancedb/embeddings/fine_tuner/basetuner.py
+++ b/python/python/lancedb/embeddings/fine_tuner/basetuner.py
@@ -0,0 +1,13 @@
+from abc import ABC, abstractmethod
+
+
+class BaseEmbeddingTuner(ABC):
+    """Base Embedding finetuning engine."""
+
+    @abstractmethod
+    def finetune(self) -> None:
+        """Goes off and does stuff."""
+
+    def helper(self) -> None:
+        """A helper method."""
+        pass
--- a/python/python/lancedb/embeddings/fine_tuner/dataset.py
+++ b/python/python/lancedb/embeddings/fine_tuner/dataset.py
@@ -0,0 +1,205 @@
+import re
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Tuple, Optional
+
+import lance
+import pyarrow as pa
+from pydantic import BaseModel
+from tqdm import tqdm
+from lancedb.utils.general import LOGGER
+from .llm import BaseLLM
+
+DEFAULT_PROMPT_TMPL = """\
+Context information is below.
+
+---------------------
+{context_str}
+---------------------
+
+Given the context information and no prior knowledge.
+generate only questions based on the below query.
+
+You are a Teacher/ Professor. Your task is to setup \
+{num_questions_per_chunk} questions for an upcoming \
+quiz/examination. The questions should be diverse in nature \
+across the document. Restrict the questions to the \
+context information provided."
+"""
+
+
+class QADataset(BaseModel):
+    """Embedding QA Finetuning Dataset.
+
+    Args:
+        queries (Dict[str, str]): Dict id -> query.
+        corpus (Dict[str, str]): Dict id -> string.
+        relevant_docs (Dict[str, List[str]]): Dict query id -> list of doc ids.
+
+    """
+    path: Optional[str] = None
+    queries: Dict[str, str]  # id -> query
+    corpus: Dict[str, str]  # id -> text
+    relevant_docs: Dict[str, List[str]]  # query id -> list of retrieved doc ids
+    mode: str = "text"
+
+    @property
+    def query_docid_pairs(self) -> List[Tuple[str, List[str]]]:
+        """Get query, relevant doc ids."""
+        return [
+            (query, self.relevant_docs[query_id])
+            for query_id, query in self.queries.items()
+        ]
+
+    def save(self, path: str, mode: str = "overwrite") -> None:
+        """Save to lance dataset"""
+        self.path = path
+        save_dir = Path(path)
+        save_dir.mkdir(parents=True, exist_ok=True)
+
+        # convert to pydict {"id": []}
+        queries = {
+            "id": list(self.queries.keys()),
+            "query": list(self.queries.values()),
+        }
+        corpus = {
+            "id": list(self.corpus.keys()),
+            "text": [
+                val or " " for val in self.corpus.values()
+            ],  # lance saves empty strings as null
+        }
+        relevant_docs = {
+            "query_id": list(self.relevant_docs.keys()),
+            "doc_id": list(self.relevant_docs.values()),
+        }
+
+        # write to lance
+        lance.write_dataset(
+            pa.Table.from_pydict(queries), save_dir / "queries.lance", mode=mode
+        )
+        lance.write_dataset(
+            pa.Table.from_pydict(corpus), save_dir / "corpus.lance", mode=mode
+        )
+        lance.write_dataset(
+            pa.Table.from_pydict(relevant_docs),
+            save_dir / "relevant_docs.lance",
+            mode=mode,
+        )
+
+    @classmethod
+    def load(cls, path: str, version: Optional[int] = None) -> "QADataset":
+        """Load from .lance data"""
+        load_dir = Path(path)
+        queries = lance.dataset(load_dir / "queries.lance", version=version).to_table().to_pydict()
+        corpus = lance.dataset(load_dir / "corpus.lance", version=version).to_table().to_pydict()
+        relevant_docs = (
+            lance.dataset(load_dir / "relevant_docs.lance", version=version).to_table().to_pydict()
+        )
+        return cls(
+            path=str(path),
+            queries=dict(zip(queries["id"], queries["query"])),
+            corpus=dict(zip(corpus["id"], corpus["text"])),
+            relevant_docs=dict(zip(relevant_docs["query_id"], relevant_docs["doc_id"])),
+        )
+
+    @classmethod
+    def switch_version(cls, version: int) -> "QADataset":
+        """Switch version of a dataset."""
+        if not cls.path:
+            raise ValueError("Path not set. You need to call save() first.")
+        return cls.load(cls.path, version=version)
+    
+    # generate queries as a convenience function
+    @classmethod
+    def from_llm(
+        cls,
+        nodes: "List[TextChunk]",
+        llm: BaseLLM,
+        qa_generate_prompt_tmpl: str = DEFAULT_PROMPT_TMPL,
+        num_questions_per_chunk: int = 2,
+    ) -> "QADataset":
+        """Generate examples given a set of nodes."""
+        node_dict = {node.id: node.text for node in nodes}
+
+        queries = {}
+        relevant_docs = {}
+        for node_id, text in tqdm(node_dict.items()):
+            query = qa_generate_prompt_tmpl.format(
+                context_str=text, num_questions_per_chunk=num_questions_per_chunk
+            )
+            response = llm.chat_completion(query)
+
+            result = str(response).strip().split("\n")
+            questions = [
+                re.sub(r"^\d+[\).\s]", "", question).strip() for question in result
+            ]
+            questions = [question for question in questions if len(question) > 0]
+            for question in questions:
+                question_id = str(uuid.uuid4())
+                queries[question_id] = question
+                relevant_docs[question_id] = [node_id]
+
+        return QADataset(queries=queries, corpus=node_dict, relevant_docs=relevant_docs)
+
+    @classmethod
+    def from_responses(
+        cls,
+        docs: List["TextChunk"],
+        queries: Dict[str, str],
+        relevant_docs: Dict[str, List[str]],
+    ) -> "QADataset":
+        """Create a QADataset from a list of TextChunks and a list of questions."""
+        node_dict = {node.id: node.text for node in docs}
+        return cls(queries=queries, corpus=node_dict, relevant_docs=relevant_docs)
+    
+    def versions(self) -> List[int]:
+        """Get the versions of the dataset."""
+        # TODO: tidy this up
+        data_paths = self._get_data_file_paths()
+        return lance.dataset(data_paths[0]).versions()
+    
+    
+    def _get_data_file_paths(self) -> str:
+        """Get the absolute path of the dataset."""
+        queries = self.path / "queries.lance"
+        corpus = self.path / "corpus.lance"
+        relevant_docs = self.path / "relevant_docs.lance"
+
+        return queries, corpus, relevant_docs
+
+        
+
+
+class TextChunk(BaseModel):
+    """Simple text chunk for generating questions."""
+
+    text: str
+    id: str
+    metadata: Dict[str, Any] = {}
+
+    @classmethod
+    def from_chunk(cls, chunk: str, metadata: dict = {}) -> "TextChunk":
+        """Create a SimpleTextChunk from a chunk."""
+        # generate a unique id
+        return cls(text=chunk, id=str(uuid.uuid4()), metadata=metadata)
+
+    @classmethod
+    def from_llama_index_node(cls, node):
+        """Convert a llama index node to a text chunk."""
+        return cls(text=node.text, id=node.node_id, metadata=node.metadata)
+
+    @classmethod
+    def from_langchain_node(cls, node):
+        """Convert a langchaain node to a text chunk."""
+        raise NotImplementedError("Not implemented yet.")
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to a dictionary."""
+        return self.dict()
+
+    def __str__(self) -> str:
+        return self.text
+
+    def __repr__(self) -> str:
+        return f"SimpleTextChunk(text={self.text}, id={self.id}, \
+        metadata={self.metadata})"
--- a/python/python/lancedb/embeddings/fine_tuner/llm.py
+++ b/python/python/lancedb/embeddings/fine_tuner/llm.py
@@ -0,0 +1,85 @@
+import os
+import re
+from functools import cached_property
+from typing import Optional
+
+from pydantic import BaseModel
+
+from ...util import attempt_import_or_raise
+from ..utils import api_key_not_found_help
+
+
+class BaseLLM(BaseModel):
+    """
+    TODO:
+    Base class for Language Model based Embedding Functions. This class is
+    loosely desined rn, and will be updated as the usage gets clearer.
+    """
+
+    model_name: str
+    model_kwargs: dict = {}
+
+    @cached_property
+    def _client():
+        """
+        Get the client for the language model
+        """
+        raise NotImplementedError
+
+    def chat_completion(self, prompt: str, **kwargs):
+        """
+        Get the chat completion for the given prompt
+        """
+        raise NotImplementedError
+
+
+class Openai(BaseLLM):
+    model_name: str = "gpt-3.5-turbo"
+    kwargs: dict = {}
+    api_key: Optional[str] = None
+
+    @cached_property
+    def _client(self):
+        """
+        Get the client for the language model
+        """
+        openai = attempt_import_or_raise("openai")
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            api_key_not_found_help("openai")
+        return openai.OpenAI()
+
+    def chat_completion(self, prompt: str) -> str:
+        """
+        Get the chat completion for the given prompt
+        """
+
+        # TODO: this is legacy openai api replace with completions
+        completion = self._client.chat.completions.create(
+            model=self.model_name,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": prompt},
+            ],
+            **self.kwargs,
+        )
+
+        text = completion.choices[0].message.content
+
+        return text
+
+    def get_questions(self, prompt: str) -> str:
+        """
+        Get the chat completion for the given prompt
+        """
+        response = self.chat_completion(prompt)
+        result = str(response).strip().split("\n")
+        questions = [
+            re.sub(r"^\d+[\).\s]", "", question).strip() for question in result
+        ]
+        questions = [question for question in questions if len(question) > 0]
+        return questions
+
+
+class Gemini(BaseLLM):
+    pass
--- a/python/python/lancedb/embeddings/instructor.py
+++ b/python/python/lancedb/embeddings/instructor.py
@@ -103,9 +103,9 @@ class InstructorEmbeddingFunction(TextEmbeddingFunction):
    # convert_to_numpy: bool = True # Hardcoding this as numpy can be ingested directly

    source_instruction: str = "represent the document for retrieval"
-    query_instruction: str = (
-        "represent the document for retrieving the most similar documents"
-    )
+    query_instruction: (
+        str
+    ) = "represent the document for retrieving the most similar documents"

    @weak_lru(maxsize=1)
    def ndims(self):
--- a/python/python/lancedb/embeddings/sentence_transformers.py
+++ b/python/python/lancedb/embeddings/sentence_transformers.py
@@ -10,12 +10,16 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from typing import List, Union
+from typing import Any, List, Optional, Union

 import numpy as np

+from lancedb.embeddings.fine_tuner import QADataset
+from lancedb.utils.general import LOGGER
+
 from ..util import attempt_import_or_raise
 from .base import TextEmbeddingFunction
+from .fine_tuner.basetuner import BaseEmbeddingTuner
 from .registry import register
 from .utils import weak_lru

@@ -80,3 +84,151 @@ class SentenceTransformerEmbeddings(TextEmbeddingFunction):
            "sentence_transformers", "sentence-transformers"
        )
        return sentence_transformers.SentenceTransformer(self.name, device=self.device)
+
+    def finetune(self, trainset: QADataset, *args, **kwargs):
+        """
+        Finetune the Sentence Transformers model
+
+        Parameters
+        ----------
+        dataset: QADataset
+            The dataset to use for finetuning
+        """
+        tuner = SentenceTransformersTuner(
+            model=self.embedding_model,
+            trainset=trainset,
+            **kwargs,
+        )
+        tuner.finetune()
+
+
+class SentenceTransformersTuner(BaseEmbeddingTuner):
+    """Sentence Transformers Embedding Finetuning Engine."""
+
+    def __init__(
+        self,
+        model: Any,
+        trainset: QADataset,
+        valset: Optional[QADataset] = None,
+        path: Optional[str] = "~/.lancedb/embeddings/models",
+        batch_size: int = 8,
+        epochs: int = 1,
+        show_progress: bool = True,
+        eval_steps: int = 50,
+        max_input_per_doc: int = -1,
+        loss: Optional[Any] = None,
+        evaluator: Optional[Any] = None,
+        run_name: Optional[str] = None,
+        log_wandb: bool = False,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        model: str
+            The model to use for finetuning.
+        trainset: QADataset
+            The training dataset.
+        valset: Optional[QADataset]
+            The validation dataset.
+        path: Optional[str]
+            The path to save the model.
+        batch_size: int, default=8
+            The batch size.
+        epochs: int, default=1
+            The number of epochs.
+        show_progress: bool, default=True
+            Whether to show progress.
+        eval_steps: int, default=50
+            The number of steps to evaluate.
+        max_input_per_doc: int, default=-1
+            The number of input per document.
+            if -1, use all documents.
+        """
+        from sentence_transformers import InputExample, losses
+        from sentence_transformers.evaluation import InformationRetrievalEvaluator
+        from torch.utils.data import DataLoader
+
+        self.model = model
+        self.trainset = trainset
+        self.valset = valset
+        self.path = path
+        self.batch_size = batch_size
+        self.epochs = epochs
+        self.show_progress = show_progress
+        self.eval_steps = eval_steps
+        self.max_input_per_doc = max_input_per_doc
+        self.evaluator = None
+        self.epochs = epochs
+        self.show_progress = show_progress
+        self.eval_steps = eval_steps
+        self.run_name = run_name
+        self.log_wandb = log_wandb
+
+        if self.max_input_per_doc < -1:
+            raise ValueError("max_input_per_doc must be -1 or greater than 0.")
+
+        examples: Any = []
+        for query_id, query in self.trainset.queries.items():
+            if max_input_per_doc == -1:
+                for node_id in self.trainset.relevant_docs[query_id]:
+                    text = self.trainset.corpus[node_id]
+                    example = InputExample(texts=[query, text])
+                    examples.append(example)
+            else:
+                node_id = self.trainset.relevant_docs[query_id][
+                    min(max_input_per_doc, len(self.trainset.relevant_docs[query_id]))
+                ]
+                text = self.trainset.corpus[node_id]
+                example = InputExample(texts=[query, text])
+                examples.append(example)
+
+        self.examples = examples
+
+        self.loader: DataLoader = DataLoader(examples, batch_size=batch_size)
+
+        if self.valset is not None:
+            eval_engine = evaluator or InformationRetrievalEvaluator
+            self.evaluator = eval_engine(
+                valset.queries, valset.corpus, valset.relevant_docs
+            )
+        self.evaluator = evaluator
+
+        # define loss
+        self.loss = loss or losses.MultipleNegativesRankingLoss(self.model)
+        self.warmup_steps = int(len(self.loader) * epochs * 0.1)
+
+    def finetune(self) -> None:
+        """Finetune the Sentence Transformers model."""
+        self.model.fit(
+            train_objectives=[(self.loader, self.loss)],
+            epochs=self.epochs,
+            warmup_steps=self.warmup_steps,
+            output_path=self.path,
+            show_progress_bar=self.show_progress,
+            evaluator=self.evaluator,
+            evaluation_steps=self.eval_steps,
+            callback=self._wandb_callback if self.log_wandb else None,
+        )
+
+        self.helper()
+
+    def helper(self) -> None:
+        """A helper method."""
+        LOGGER.info("Finetuning complete.")
+        LOGGER.info(f"Model saved to {self.path}.")
+        LOGGER.info("You can now use the model as follows:")
+        LOGGER.info(
+            f"model = get_registry().get('sentence-transformers').create(name='./{self.path}')"  # noqa
+        )
+
+    def _wandb_callback(self, score, epoch, steps):
+        try:
+            import wandb
+        except ImportError:
+            raise ImportError(
+                "wandb is not installed. Please install it using `pip install wandb`"
+            )
+        run = wandb.run or wandb.init(
+            project="sbert_lancedb_finetune", name=self.run_name
+        )
+        run.log({"epoch": epoch, "steps": steps, "score": score})
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -1033,7 +1033,7 @@ class AsyncQueryBase(object):
        Construct an AsyncQueryBase

        This method is not intended to be called directly.  Instead, use the
-        [Table.query][] method to create a query.
+        [AsyncTable.query][lancedb.table.AsyncTable.query] method to create a query.
        """
        self._inner = inner

@@ -1041,7 +1041,10 @@ class AsyncQueryBase(object):
        """
        Only return rows matching the given predicate

-        The predicate should be supplied as an SQL query string.  For example:
+        The predicate should be supplied as an SQL query string.
+
+        Examples
+        --------

        >>> predicate = "x > 10"
        >>> predicate = "y > 0 AND y < 100"
@@ -1112,7 +1115,8 @@ class AsyncQueryBase(object):
        Execute the query and collect the results into an Apache Arrow Table.

        This method will collect all results into memory before returning.  If
-        you expect a large number of results, you may want to use [to_batches][]
+        you expect a large number of results, you may want to use
+        [to_batches][lancedb.query.AsyncQueryBase.to_batches]
        """
        batch_iter = await self.to_batches()
        return pa.Table.from_batches(
@@ -1123,12 +1127,13 @@ class AsyncQueryBase(object):
        """
        Execute the query and collect the results into a pandas DataFrame.

-        This method will collect all results into memory before returning.  If
-        you expect a large number of results, you may want to use [to_batches][]
-        and convert each batch to pandas separately.
+        This method will collect all results into memory before returning.  If you
+        expect a large number of results, you may want to use
+        [to_batches][lancedb.query.AsyncQueryBase.to_batches] and convert each batch to
+        pandas separately.

-        Example
-        -------
+        Examples
+        --------

        >>> import asyncio
        >>> from lancedb import connect_async
@@ -1148,7 +1153,7 @@ class AsyncQuery(AsyncQueryBase):
        Construct an AsyncQuery

        This method is not intended to be called directly.  Instead, use the
-        [Table.query][] method to create a query.
+        [AsyncTable.query][lancedb.table.AsyncTable.query] method to create a query.
        """
        super().__init__(inner)
        self._inner = inner
@@ -1189,8 +1194,8 @@ class AsyncQuery(AsyncQueryBase):
        If there is only one vector column (a column whose data type is a
        fixed size list of floats) then the column does not need to be specified.
        If there is more than one vector column you must use
-        [AsyncVectorQuery::column][] to specify which column you would like to
-        compare with.
+        [AsyncVectorQuery.column][lancedb.query.AsyncVectorQuery.column] to specify
+        which column you would like to compare with.

        If no index has been created on the vector column then a vector query
        will perform a distance comparison between the query vector and every
@@ -1221,8 +1226,10 @@ class AsyncVectorQuery(AsyncQueryBase):
        Construct an AsyncVectorQuery

        This method is not intended to be called directly.  Instead, create
-        a query first with [Table.query][] and then use [AsyncQuery.nearest_to][]
-        to convert to a vector query.
+        a query first with [AsyncTable.query][lancedb.table.AsyncTable.query] and then
+        use [AsyncQuery.nearest_to][lancedb.query.AsyncQuery.nearest_to]] to convert to
+        a vector query.  Or you can use
+        [AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search]
        """
        super().__init__(inner)
        self._inner = inner
@@ -1232,7 +1239,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        Set the vector column to query

        This controls which column is compared to the query vector supplied in
-        the call to [Query.nearest_to][].
+        the call to [AsyncQuery.nearest_to][lancedb.query.AsyncQuery.nearest_to].

        This parameter must be specified if the table has more than one column
        whose data type is a fixed-size-list of floats.
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -26,6 +26,7 @@ from ..db import DBConnection
 from ..embeddings import EmbeddingFunctionConfig
 from ..pydantic import LanceModel
 from ..table import Table, _sanitize_data
+from ..util import validate_table_name
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
 from .errors import LanceDBClientError
@@ -223,6 +224,7 @@ class RemoteDBConnection(DBConnection):
        LanceTable(table4)

        """
+        validate_table_name(name)
        if data is None and schema is None:
            raise ValueError("Either data or schema must be provided.")
        if embedding_functions is not None:
--- a/python/python/lancedb/rerankers/cross_encoder.py
+++ b/python/python/lancedb/rerankers/cross_encoder.py
@@ -14,7 +14,7 @@ class CrossEncoderReranker(Reranker):

    Parameters
    ----------
-    model : str, default "cross-encoder/ms-marco-TinyBERT-L-6"
+    model_name : str, default "cross-encoder/ms-marco-TinyBERT-L-6"
        The name of the cross encoder model to use. See the sentence transformers
        documentation for a list of available models.
    column : str, default "text"
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1893,8 +1893,8 @@ class AsyncTable:
    An AsyncTable object is expected to be long lived and reused for multiple
    operations. AsyncTable objects will cache a certain amount of index data in memory.
    This cache will be freed when the Table is garbage collected.  To eagerly free the
-    cache you can call the [close][AsyncTable.close] method.  Once the AsyncTable is
-    closed, it cannot be used for any further operations.
+    cache you can call the [close][lancedb.AsyncTable.close] method.  Once the
+    AsyncTable is closed, it cannot be used for any further operations.

    An AsyncTable can also be used as a context manager, and will automatically close
    when the context is exited.  Closing a table is optional.  If you do not close the
@@ -1903,13 +1903,17 @@ class AsyncTable:
    Examples
    --------

-    Create using [DBConnection.create_table][lancedb.DBConnection.create_table]
+    Create using [AsyncConnection.create_table][lancedb.AsyncConnection.create_table]
    (more examples in that method's documentation).

    >>> import lancedb
-    >>> db = lancedb.connect("./.lancedb")
-    >>> table = db.create_table("my_table", data=[{"vector": [1.1, 1.2], "b": 2}])
-    >>> table.head()
+    >>> async def create_a_table():
+    ...     db = await lancedb.connect_async("./.lancedb")
+    ...     data = [{"vector": [1.1, 1.2], "b": 2}]
+    ...     table = await db.create_table("my_table", data=data)
+    ...     print(await table.query().limit(5).to_arrow())
+    >>> import asyncio
+    >>> asyncio.run(create_a_table())
    pyarrow.Table
    vector: fixed_size_list<item: float>[2]
      child 0, item: float
@@ -1918,25 +1922,37 @@ class AsyncTable:
    vector: [[[1.1,1.2]]]
    b: [[2]]

-    Can append new data with [Table.add()][lancedb.table.Table.add].
+    Can append new data with [AsyncTable.add()][lancedb.table.AsyncTable.add].

-    >>> table.add([{"vector": [0.5, 1.3], "b": 4}])
+    >>> async def add_to_table():
+    ...     db = await lancedb.connect_async("./.lancedb")
+    ...     table = await db.open_table("my_table")
+    ...     await table.add([{"vector": [0.5, 1.3], "b": 4}])
+    >>> asyncio.run(add_to_table())

-    Can query the table with [Table.search][lancedb.table.Table.search].
+    Can query the table with
+    [AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search].

-    >>> table.search([0.4, 0.4]).select(["b", "vector"]).to_pandas()
+    >>> async def search_table_for_vector():
+    ...     db = await lancedb.connect_async("./.lancedb")
+    ...     table = await db.open_table("my_table")
+    ...     results = (
+    ...       await table.vector_search([0.4, 0.4]).select(["b", "vector"]).to_pandas()
+    ...     )
+    ...     print(results)
+    >>> asyncio.run(search_table_for_vector())
       b      vector  _distance
    0  4  [0.5, 1.3]       0.82
    1  2  [1.1, 1.2]       1.13

    Search queries are much faster when an index is created. See
-    [Table.create_index][lancedb.table.Table.create_index].
+    [AsyncTable.create_index][lancedb.table.AsyncTable.create_index].
    """

    def __init__(self, table: LanceDBTable):
-        """Create a new Table object.
+        """Create a new AsyncTable object.

-        You should not create Table objects directly.
+        You should not create AsyncTable objects directly.

        Use [AsyncConnection.create_table][lancedb.AsyncConnection.create_table] and
        [AsyncConnection.open_table][lancedb.AsyncConnection.open_table] to obtain
@@ -1988,6 +2004,14 @@ class AsyncTable:
        return await self._inner.count_rows(filter)

    def query(self) -> AsyncQuery:
+        """
+        Returns an [AsyncQuery][lancedb.query.AsyncQuery] that can be used
+        to search the table.
+
+        Use methods on the returned query to control query behavior.  The query
+        can be executed with methods like [to_arrow][lancedb.query.AsyncQuery.to_arrow],
+        [to_pandas][lancedb.query.AsyncQuery.to_pandas] and more.
+        """
        return AsyncQuery(self._inner.query())

    async def to_pandas(self) -> "pd.DataFrame":
@@ -2024,20 +2048,8 @@ class AsyncTable:

        Parameters
        ----------
-        index: Index
-            The index to create.
-
-            LanceDb supports multiple types of indices.  See the static methods on
-            the Index class for more details.
-        column: str, default None
+        column: str
            The column to index.
-
-            When building a scalar index this must be set.
-
-            When building a vector index, this is optional.  The default will look
-            for any columns of type fixed-size-list with floating point values.  If
-            there is only one column of this type then it will be used.  Otherwise
-            an error will be returned.
        replace: bool, default True
            Whether to replace the existing index

@@ -2046,6 +2058,10 @@ class AsyncTable:
            that index is out of date.

            The default is True
+        config: Union[IvfPq, BTree], default None
+            For advanced configuration you can specify the type of index you would
+            like to create.   You can also specify index-specific parameters when
+            creating an index object.
        """
        index = None
        if config is not None:
@@ -2167,7 +2183,8 @@ class AsyncTable:
        Search the table with a given query vector.
        This is a convenience method for preparing a vector query and
        is the same thing as calling `nearestTo` on the builder returned
-        by `query`.  Seer [nearest_to][AsyncQuery.nearest_to] for more details.
+        by `query`.  Seer [nearest_to][lancedb.query.AsyncQuery.nearest_to] for more
+        details.
        """
        return self.query().nearest_to(query_vector)

@@ -2233,7 +2250,7 @@ class AsyncTable:
           x      vector
        0  3  [5.0, 6.0]
        """
-        raise NotImplementedError
+        return await self._inner.delete(where)

    async def update(
        self,
@@ -2289,102 +2306,6 @@ class AsyncTable:

        return await self._inner.update(updates_sql, where)

-    async def cleanup_old_versions(
-        self,
-        older_than: Optional[timedelta] = None,
-        *,
-        delete_unverified: bool = False,
-    ) -> CleanupStats:
-        """
-        Clean up old versions of the table, freeing disk space.
-
-        Note: This function is not available in LanceDb Cloud (since LanceDb
-        Cloud manages cleanup for you automatically)
-
-        Parameters
-        ----------
-        older_than: timedelta, default None
-            The minimum age of the version to delete. If None, then this defaults
-            to two weeks.
-        delete_unverified: bool, default False
-            Because they may be part of an in-progress transaction, files newer
-            than 7 days old are not deleted by default. If you are sure that
-            there are no in-progress transactions, then you can set this to True
-            to delete all files older than `older_than`.
-
-        Returns
-        -------
-        CleanupStats
-            The stats of the cleanup operation, including how many bytes were
-            freed.
-        """
-        raise NotImplementedError
-
-    async def compact_files(self, *args, **kwargs):
-        """
-        Run the compaction process on the table.
-
-        Note: This function is not available in LanceDb Cloud (since LanceDb
-        Cloud manages compaction for you automatically)
-
-        This can be run after making several small appends to optimize the table
-        for faster reads.
-
-        Arguments are passed onto :meth:`lance.dataset.DatasetOptimizer.compact_files`.
-        For most cases, the default should be fine.
-        """
-        raise NotImplementedError
-
-    async def add_columns(self, transforms: Dict[str, str]):
-        """
-        Add new columns with defined values.
-
-        This is not yet available in LanceDB Cloud.
-
-        Parameters
-        ----------
-        transforms: Dict[str, str]
-            A map of column name to a SQL expression to use to calculate the
-            value of the new column. These expressions will be evaluated for
-            each row in the table, and can reference existing columns.
-        """
-        raise NotImplementedError
-
-    async def alter_columns(self, alterations: Iterable[Dict[str, str]]):
-        """
-        Alter column names and nullability.
-
-        This is not yet available in LanceDB Cloud.
-
-        alterations : Iterable[Dict[str, Any]]
-            A sequence of dictionaries, each with the following keys:
-            - "path": str
-                The column path to alter. For a top-level column, this is the name.
-                For a nested column, this is the dot-separated path, e.g. "a.b.c".
-            - "name": str, optional
-                The new name of the column. If not specified, the column name is
-                not changed.
-            - "nullable": bool, optional
-                Whether the column should be nullable. If not specified, the column
-                nullability is not changed. Only non-nullable columns can be changed
-                to nullable. Currently, you cannot change a nullable column to
-                non-nullable.
-        """
-        raise NotImplementedError
-
-    async def drop_columns(self, columns: Iterable[str]):
-        """
-        Drop columns from the table.
-
-        This is not yet available in LanceDB Cloud.
-
-        Parameters
-        ----------
-        columns : Iterable[str]
-            The names of the columns to drop.
-        """
-        raise NotImplementedError
-
    async def version(self) -> int:
        """
        Retrieve the version of the table
--- a/python/python/lancedb/util.py
+++ b/python/python/lancedb/util.py
@@ -25,6 +25,8 @@ import numpy as np
 import pyarrow as pa
 import pyarrow.fs as pa_fs

+from ._lancedb import validate_table_name as native_validate_table_name
+

 def safe_import_adlfs():
    try:
@@ -286,3 +288,8 @@ def deprecated(func):
        return func(*args, **kwargs)

    return new_func
+
+
+def validate_table_name(name: str):
+    """Verify the table name is valid."""
+    native_validate_table_name(name)
--- a/python/python/tests/docs/test_basic.py
+++ b/python/python/tests/docs/test_basic.py
@@ -0,0 +1,162 @@
+import shutil
+
+# --8<-- [start:imports]
+import lancedb
+import pandas as pd
+import pyarrow as pa
+
+# --8<-- [end:imports]
+import pytest
+from numpy.random import randint, random
+
+shutil.rmtree("data/sample-lancedb", ignore_errors=True)
+
+
+def test_quickstart():
+    # --8<-- [start:connect]
+    uri = "data/sample-lancedb"
+    db = lancedb.connect(uri)
+    # --8<-- [end:connect]
+
+    # --8<-- [start:create_table]
+    data = [
+        {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+        {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+    ]
+
+    # Synchronous client
+    tbl = db.create_table("my_table", data=data)
+    # --8<-- [end:create_table]
+
+    # --8<-- [start:create_table_pandas]
+    df = pd.DataFrame(
+        [
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ]
+    )
+    # Synchronous client
+    tbl = db.create_table("table_from_df", data=df)
+    # --8<-- [end:create_table_pandas]
+
+    # --8<-- [start:create_empty_table]
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
+    # Synchronous client
+    tbl = db.create_table("empty_table", schema=schema)
+    # --8<-- [end:create_empty_table]
+    # --8<-- [start:open_table]
+    # Synchronous client
+    tbl = db.open_table("my_table")
+    # --8<-- [end:open_table]
+    # --8<-- [start:table_names]
+    # Synchronous client
+    print(db.table_names())
+    # --8<-- [end:table_names]
+    # Synchronous client
+    # --8<-- [start:add_data]
+    # Option 1: Add a list of dicts to a table
+    data = [
+        {"vector": [1.3, 1.4], "item": "fizz", "price": 100.0},
+        {"vector": [9.5, 56.2], "item": "buzz", "price": 200.0},
+    ]
+    tbl.add(data)
+
+    # Option 2: Add a pandas DataFrame to a table
+    df = pd.DataFrame(data)
+    tbl.add(data)
+    # --8<-- [end:add_data]
+    # --8<-- [start:vector_search]
+    # Synchronous client
+    tbl.search([100, 100]).limit(2).to_pandas()
+    # --8<-- [end:vector_search]
+    tbl.add(
+        [
+            {"vector": random(2), "item": "autogen", "price": randint(100)}
+            for _ in range(1000)
+        ]
+    )
+    # --8<-- [start:create_index]
+    # Synchronous client
+    tbl.create_index(num_sub_vectors=1)
+    # --8<-- [end:create_index]
+    # --8<-- [start:delete_rows]
+    # Synchronous client
+    tbl.delete('item = "fizz"')
+    # --8<-- [end:delete_rows]
+    # --8<-- [start:drop_table]
+    # Synchronous client
+    db.drop_table("my_table")
+    # --8<-- [end:drop_table]
+
+
+@pytest.mark.asyncio
+async def test_quickstart_async():
+    # --8<-- [start:connect_async]
+    # LanceDb offers both a synchronous and an asynchronous client.  There are still a
+    # few operations that are only supported by the synchronous client (e.g. embedding
+    # functions, full text search) but both APIs should soon be equivalent
+
+    # In this guide we will give examples of both clients.  In other guides we will
+    # typically only provide examples with one client or the other.
+    uri = "data/sample-lancedb"
+    async_db = await lancedb.connect_async(uri)
+    # --8<-- [end:connect_async]
+
+    data = [
+        {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+        {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+    ]
+
+    # --8<-- [start:create_table_async]
+    # Asynchronous client
+    async_tbl = await async_db.create_table("my_table2", data=data)
+    # --8<-- [end:create_table_async]
+
+    df = pd.DataFrame(
+        [
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ]
+    )
+
+    # --8<-- [start:create_table_async_pandas]
+    # Asynchronous client
+    async_tbl = await async_db.create_table("table_from_df2", df)
+    # --8<-- [end:create_table_async_pandas]
+
+    schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2))])
+    # --8<-- [start:create_empty_table_async]
+    # Asynchronous client
+    async_tbl = await async_db.create_table("empty_table2", schema=schema)
+    # --8<-- [end:create_empty_table_async]
+    # --8<-- [start:open_table_async]
+    # Asynchronous client
+    async_tbl = await async_db.open_table("my_table2")
+    # --8<-- [end:open_table_async]
+    # --8<-- [start:table_names_async]
+    # Asynchronous client
+    print(await async_db.table_names())
+    # --8<-- [end:table_names_async]
+    # --8<-- [start:add_data_async]
+    # Asynchronous client
+    await async_tbl.add(data)
+    # --8<-- [end:add_data_async]
+    # Add sufficient data for training
+    data = [{"vector": [x, x], "item": "filler", "price": x * x} for x in range(1000)]
+    await async_tbl.add(data)
+    # --8<-- [start:vector_search_async]
+    # Asynchronous client
+    await async_tbl.vector_search([100, 100]).limit(2).to_pandas()
+    # --8<-- [end:vector_search_async]
+    # --8<-- [start:create_index_async]
+    # Asynchronous client (must specify column to index)
+    await async_tbl.create_index("vector")
+    # --8<-- [end:create_index_async]
+    # --8<-- [start:delete_rows_async]
+    # Asynchronous client
+    await async_tbl.delete('item = "fizz"')
+    # --8<-- [end:delete_rows_async]
+    # --8<-- [start:drop_table_async]
+    # Asynchronous client
+    await async_db.drop_table("my_table2")
+    # --8<-- [end:drop_table_async]
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -521,3 +521,15 @@ def test_prefilter_with_index(tmp_path):
        .to_arrow()
    )
    assert table.num_rows == 1
+
+
+def test_create_table_with_invalid_names(tmp_path):
+    db = lancedb.connect(uri=tmp_path)
+    data = [{"vector": np.random.rand(128), "item": "foo"} for i in range(10)]
+    with pytest.raises(ValueError):
+        db.create_table("foo/bar", data)
+    with pytest.raises(ValueError):
+        db.create_table("foo bar", data)
+    with pytest.raises(ValueError):
+        db.create_table("foo$$bar", data)
+    db.create_table("foo.bar", data)
--- a/python/python/tests/test_embedding_fine_tuning.py
+++ b/python/python/tests/test_embedding_fine_tuning.py
@@ -0,0 +1,45 @@
+import uuid
+
+import pytest
+from lancedb.embeddings import get_registry
+from lancedb.embeddings.fine_tuner import QADataset, TextChunk
+from tqdm import tqdm
+
+
+@pytest.mark.slow
+def test_finetuning_sentence_transformers(tmp_path):
+    queries = {}
+    relevant_docs = {}
+    chunks = [
+        "This is a chunk related to legal docs",
+        "This is another chunk related financial docs",
+        "This is a chunk related to sports docs",
+        "This is another chunk related to fashion docs",
+    ]
+    text_chunks = [TextChunk.from_chunk(chunk) for chunk in chunks]
+    for chunk in tqdm(text_chunks):
+        questions = [
+            "What is this chunk about?",
+            "What is the main topic of this chunk?",
+        ]
+        for question in questions:
+            question_id = str(uuid.uuid4())
+            queries[question_id] = question
+            relevant_docs[question_id] = [chunk.id]
+    ds = QADataset.from_responses(text_chunks, queries, relevant_docs)
+
+    assert len(ds.queries) == 8
+    assert len(ds.corpus) == 4
+
+    model = get_registry().get("sentence-transformers").create()
+    model.finetune(trainset=ds, valset=ds, path=str(tmp_path / "model"), epochs=1)
+    model = (
+        get_registry().get("sentence-transformers").create(name=str(tmp_path / "model"))
+    )
+    res = model.evaluate(ds)
+    assert res is not None
+
+
+def test_text_chunk():
+    # TODO
+    pass
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -137,6 +137,21 @@ impl Connection {
            Ok(Table::new(table))
        })
    }
+
+    pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<&PyAny> {
+        let inner = self_.get_inner()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.drop_table(name).await.infer_error()
+        })
+    }
+
+    pub fn drop_db(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
+        let inner = self_.get_inner()?.clone();
+        future_into_py(
+            self_.py(),
+            async move { inner.drop_db().await.infer_error() },
+        )
+    }
 }

 #[pyfunction]
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -42,6 +42,7 @@ pub fn _lancedb(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_class::<VectorQuery>()?;
    m.add_class::<RecordBatchStream>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
+    m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
    Ok(())
 }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -80,6 +80,13 @@ impl Table {
        })
    }

+    pub fn delete<'a>(self_: PyRef<'a, Self>, condition: String) -> PyResult<&'a PyAny> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.delete(&condition).await.infer_error()
+        })
+    }
+
    pub fn update<'a>(
        self_: PyRef<'a, Self>,
        updates: &PyDict,
--- a/python/src/util.rs
+++ b/python/src/util.rs
@@ -3,7 +3,7 @@ use std::sync::Mutex;
 use lancedb::DistanceType;
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
-    PyResult,
+    pyfunction, PyResult,
 };

 /// A wrapper around a rust builder
@@ -49,3 +49,9 @@ pub fn parse_distance_type(distance_type: impl AsRef<str>) -> PyResult<DistanceT
        ))),
    }
 }
+
+#[pyfunction]
+pub(crate) fn validate_table_name(table_name: &str) -> PyResult<()> {
+    lancedb::utils::validate_table_name(table_name)
+        .map_err(|e| PyValueError::new_err(e.to_string()))
+}
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.4.13"
+version = "0.4.14"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.4.13"
+version = "0.4.14"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -22,6 +22,7 @@ chrono = { workspace = true }
 object_store = { workspace = true }
 snafu = { workspace = true }
 half = { workspace = true }
+lazy_static.workspace = true
 lance = { workspace = true }
 lance-index = { workspace = true }
 lance-linalg = { workspace = true }
@@ -34,11 +35,10 @@ bytes = "1"
 futures.workspace = true
 num-traits.workspace = true
 url.workspace = true
+regex.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
-
 # For remote feature
-
 reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }

 [dev-dependencies]
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -31,6 +31,7 @@ use crate::arrow::IntoArrow;
 use crate::error::{CreateDirSnafu, Error, InvalidTableNameSnafu, Result};
 use crate::io::object_store::MirroringObjectStoreWrapper;
 use crate::table::{NativeTable, WriteOptions};
+use crate::utils::validate_table_name;
 use crate::Table;

 pub const LANCE_FILE_EXTENSION: &str = "lance";
@@ -675,13 +676,18 @@ impl Database {

    /// Get the URI of a table in the database.
    fn table_uri(&self, name: &str) -> Result<String> {
+        validate_table_name(name)?;
+
        let path = Path::new(&self.uri);
        let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));

        let mut uri = table_uri
            .as_path()
            .to_str()
-            .context(InvalidTableNameSnafu { name })?
+            .context(InvalidTableNameSnafu {
+                name,
+                reason: "Name is not valid URL",
+            })?
            .to_string();

        // If there are query string set on the connection, propagate to lance
--- a/rust/lancedb/src/error.rs
+++ b/rust/lancedb/src/error.rs
@@ -20,8 +20,8 @@ use snafu::Snafu;
 #[derive(Debug, Snafu)]
 #[snafu(visibility(pub(crate)))]
 pub enum Error {
-    #[snafu(display("Invalid table name: {name}"))]
-    InvalidTableName { name: String },
+    #[snafu(display("Invalid table name (\"{name}\"): {reason}"))]
+    InvalidTableName { name: String, reason: String },
    #[snafu(display("Invalid input, {message}"))]
    InvalidInput { message: String },
    #[snafu(display("Table '{name}' was not found"))]
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -230,9 +230,9 @@ pub enum DistanceType {
 impl From<DistanceType> for LanceDistanceType {
    fn from(value: DistanceType) -> Self {
        match value {
-            DistanceType::L2 => LanceDistanceType::L2,
-            DistanceType::Cosine => LanceDistanceType::Cosine,
-            DistanceType::Dot => LanceDistanceType::Dot,
+            DistanceType::L2 => Self::L2,
+            DistanceType::Cosine => Self::Cosine,
+            DistanceType::Dot => Self::Dot,
        }
    }
 }
@@ -240,9 +240,9 @@ impl From<DistanceType> for LanceDistanceType {
 impl From<LanceDistanceType> for DistanceType {
    fn from(value: LanceDistanceType) -> Self {
        match value {
-            LanceDistanceType::L2 => DistanceType::L2,
-            LanceDistanceType::Cosine => DistanceType::Cosine,
-            LanceDistanceType::Dot => DistanceType::Dot,
+            LanceDistanceType::L2 => Self::L2,
+            LanceDistanceType::Cosine => Self::Cosine,
+            LanceDistanceType::Dot => Self::Dot,
        }
    }
 }
@@ -251,7 +251,7 @@ impl<'a> TryFrom<&'a str> for DistanceType {
    type Error = <LanceDistanceType as TryFrom<&'a str>>::Error;

    fn try_from(value: &str) -> std::prelude::v1::Result<Self, Self::Error> {
-        LanceDistanceType::try_from(value).map(DistanceType::from)
+        LanceDistanceType::try_from(value).map(Self::from)
    }
 }

--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -854,6 +854,7 @@ impl NativeTable {
            .to_str()
            .ok_or(Error::InvalidTableName {
                name: uri.to_string(),
+                reason: "Table name is not valid URL".to_string(),
            })?;
        Ok(name.to_string())
    }
@@ -1197,7 +1198,7 @@ impl NativeTable {
                if dim != query_vector.len() as i32 {
                    return Err(Error::InvalidInput {
                        message: format!(
-                            "The dimension of the query vector does not match with the dimension of the vector column '{}': 
+                            "The dimension of the query vector does not match with the dimension of the vector column '{}':
                                query dim={}, expected vector dim={}",
                            column,
                            query_vector.len(),
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -1,12 +1,30 @@
+// Copyright 2024 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 use std::sync::Arc;

 use arrow_schema::Schema;
-
 use lance::dataset::{ReadParams, WriteParams};
 use lance::io::{ObjectStoreParams, WrappingObjectStore};
+use lazy_static::lazy_static;

 use crate::error::{Error, Result};

+lazy_static! {
+    static ref TABLE_NAME_REGEX: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_\-\.]+$").unwrap();
+}
+
 pub trait PatchStoreParam {
    fn patch_with_store_wrapper(
        self,
@@ -64,6 +82,25 @@ impl PatchReadParam for ReadParams {
    }
 }

+/// Validate table name.
+pub fn validate_table_name(name: &str) -> Result<()> {
+    if name.is_empty() {
+        return Err(Error::InvalidTableName {
+            name: name.to_string(),
+            reason: "Table names cannot be empty strings".to_string(),
+        });
+    }
+    if !TABLE_NAME_REGEX.is_match(name) {
+        return Err(Error::InvalidTableName {
+            name: name.to_string(),
+            reason:
+                "Table names can only contain alphanumeric characters, underscores, hyphens, and periods"
+                    .to_string(),
+        });
+    }
+    Ok(())
+}
+
 /// Find one default column to create index.
 pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
    // Try to find one fixed size list array column.
@@ -145,4 +182,20 @@ mod tests {
            .to_string()
            .contains("More than one"));
    }
+
+    #[test]
+    fn test_validate_table_name() {
+        assert!(validate_table_name("my_table").is_ok());
+        assert!(validate_table_name("my_table_1").is_ok());
+        assert!(validate_table_name("123mytable").is_ok());
+        assert!(validate_table_name("_12345table").is_ok());
+        assert!(validate_table_name("table.12345").is_ok());
+        assert!(validate_table_name("table.._dot_..12345").is_ok());
+
+        assert!(validate_table_name("").is_err());
+        assert!(validate_table_name("my_table!").is_err());
+        assert!(validate_table_name("my/table").is_err());
+        assert!(validate_table_name("my@table").is_err());
+        assert!(validate_table_name("name with space").is_err());
+    }
 }
Author	SHA1	Message	Date
ayush chaurasia	99d1a06a44	add dataset features	2024-04-05 16:34:21 +05:30
Ayush Chaurasia	f23641d703	feat(python): Embedding API fine tuning support (#1125 ) # based on https://github.com/lancedb/lancedb/pull/1023 Very WIP. I'm thinking of merging individual pieces in this feature branch instead of main so we can still review code in pieces and avoid polluting main. - Adds support for creating corpus from llama-index text-node object (aim to remove this dependency) - Adds very basic support for LLM api for chat completion, will expand as need arises. - Add basic universal evaluator - Add Sentence transformer finetuning support Known problems: - [ ] W&B experiment tracking is not working for sentence transformers	2024-03-26 20:19:15 +05:30
Ayush Chaurasia	e9e0a37ca8	docs: Add all available HF/sentence transformers embedding models list (#1134 ) Solves - https://github.com/lancedb/lancedb/issues/968	2024-03-26 19:04:09 +05:30
Weston Pace	c37a28abbd	docs: add the async python API to the docs (#1156 )	2024-03-26 07:54:16 -05:00
Lance Release	98c1e635b3	Updating package-lock.json	2024-03-25 20:38:37 +00:00
Lance Release	9992b927fd	Updating package-lock.json	2024-03-25 15:43:00 +00:00
Lance Release	80d501011c	Bump version: 0.4.13 → 0.4.14	2024-03-25 15:42:49 +00:00
Weston Pace	6e3a9d08e0	feat: add publish step for nodejs (#1155 ) This will start publishing `@lancedb/lancedb` with the new nodejs package on our releases.	2024-03-25 11:23:30 -04:00
Pranav Maddi	268d8e057b	Adds a Ask LanceDB button to docs. (#1150 ) This links out to the new [asklancedb.com](https://asklancedb.com) page. Screenshots of the change: ![Quick start - LanceDB · 10 20am · 03-22](https://github.com/lancedb/lancedb/assets/2371511/c45ba893-fc74-4957-bdd3-3712b351aff3) ![Quick start - LanceDB](https://github.com/lancedb/lancedb/assets/2371511/d4762eb6-52af-4fd5-857e-3ed280716999)	2024-03-23 01:09:44 +05:30
Bert	dfc518b8fb	Node SDK Client middleware for HTTP Requests (#1130 ) Adds client-side middleware to LanceDB Node SDK to instrument HTTP Requests Example - adding `x-request-id` request header: ```js class HttpMiddleware { constructor({ requestId }) { this.requestId = requestId } onRemoteRequest(req, next) { req.headers['x-request-id'] = this.requestId return next(req) } } const db = await lancedb.connect({ uri: 'db://remote-123', apiKey: 'sk_...', }) let tables = await db.withMiddleware(new HttpMiddleware({ requestId: '123' })).tableNames(); ``` --------- Co-authored-by: Weston Pace <weston.pace@gmail.com>	2024-03-22 11:58:05 -04:00
QianZhu	98acf34ae8	remove warnings (#1147 )	2024-03-21 14:49:01 -07:00
Lei Xu	25988d23cd	chore: validate table name (#1146 ) Closes #1129	2024-03-21 14:46:13 -07:00