patch

2025-12-23 05:19:58 +00:00 · 2024-05-15 13:44:27 -04:00
106 changed files with 8399 additions and 11740 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -0,0 +1,22 @@
+[bumpversion]
+current_version = 0.4.20
+commit = True
+message = Bump version: {current_version} → {new_version}
+tag = True
+tag_name = v{new_version}
+
+[bumpversion:file:node/package.json]
+
+[bumpversion:file:nodejs/package.json]
+
+[bumpversion:file:nodejs/npm/darwin-x64/package.json]
+
+[bumpversion:file:nodejs/npm/darwin-arm64/package.json]
+
+[bumpversion:file:nodejs/npm/linux-x64-gnu/package.json]
+
+[bumpversion:file:nodejs/npm/linux-arm64-gnu/package.json]
+
+[bumpversion:file:rust/ffi/node/Cargo.toml]
+
+[bumpversion:file:rust/lancedb/Cargo.toml]
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,57 +0,0 @@
-[tool.bumpversion]
-current_version = "0.5.0"
-parse = """(?x)
-    (?P<major>0|[1-9]\\d*)\\.
-    (?P<minor>0|[1-9]\\d*)\\.
-    (?P<patch>0|[1-9]\\d*)
-    (?:-(?P<pre_l>[a-zA-Z-]+)\\.(?P<pre_n>0|[1-9]\\d*))?
-"""
-serialize = [
-    "{major}.{minor}.{patch}-{pre_l}.{pre_n}",
-    "{major}.{minor}.{patch}",
-]
-search = "{current_version}"
-replace = "{new_version}"
-regex = false
-ignore_missing_version = false
-ignore_missing_files = false
-tag = true
-sign_tags = false
-tag_name = "v{new_version}"
-tag_message = "Bump version: {current_version} → {new_version}"
-allow_dirty = true
-commit = true
-message = "Bump version: {current_version} → {new_version}"
-commit_args = ""
-
-[tool.bumpversion.parts.pre_l]
-values = ["beta", "final"]
-optional_value = "final"
-
-[[tool.bumpversion.files]]
-filename = "node/package.json"
-search = "\"version\": \"{current_version}\","
-replace = "\"version\": \"{new_version}\","
-
-[[tool.bumpversion.files]]
-filename = "nodejs/package.json"
-search = "\"version\": \"{current_version}\","
-replace = "\"version\": \"{new_version}\","
-
-# nodejs binary packages
-[[tool.bumpversion.files]]
-glob = "nodejs/npm/*/package.json"
-search = "\"version\": \"{current_version}\","
-replace = "\"version\": \"{new_version}\","
-
-# Cargo files
-# ------------
-[[tool.bumpversion.files]]
-filename = "rust/ffi/node/Cargo.toml"
-search = "\nversion = \"{current_version}\""
-replace = "\nversion = \"{new_version}\""
-
-[[tool.bumpversion.files]]
-filename = "rust/lancedb/Cargo.toml"
-search = "\nversion = \"{current_version}\""
-replace = "\nversion = \"{new_version}\""
--- a/.github/release.yml
+++ b/.github/release.yml
@@ -0,0 +1,25 @@
+# TODO: create separate templates for Python and other releases.
+changelog:
+  exclude:
+    labels:
+      - ci
+      - chore
+  categories:
+    - title: Breaking Changes 🛠
+      labels:
+        - breaking-change
+    - title: New Features 🎉
+      labels:
+        - enhancement
+    - title: Bug Fixes 🐛
+      labels:
+        - bug
+    - title: Documentation 📚
+      labels:
+        - documentation
+    - title: Performance Improvements 🚀
+      labels:
+        - performance
+    - title: Other Changes
+      labels:
+        - "*"
--- a/.github/release_notes.json
+++ b/.github/release_notes.json
@@ -1,41 +0,0 @@
-{
-    "ignore_labels": ["chore"],
-    "pr_template": "- ${{TITLE}} by @${{AUTHOR}} in ${{URL}}",
-    "categories": [
-        {
-            "title": "## 🏆 Highlights",
-            "labels": ["highlight"]
-        },
-        {
-            "title": "## 🛠 Breaking Changes",
-            "labels": ["breaking-change"]
-        },
-        {
-            "title": "## ⚠️ Deprecations ",
-            "labels": ["deprecation"]
-        },
-        {
-            "title": "## 🎉 New Features",
-            "labels": ["enhancement"]
-        },
-        {
-            "title": "## 🐛 Bug Fixes",
-            "labels": ["bug"]
-        },
-        {
-            "title": "## 📚 Documentation",
-            "labels": ["documentation"]
-        },
-        {
-            "title": "## 🚀 Performance Improvements",
-            "labels": ["performance"]
-        },
-        {
-            "title": "## Other Changes"
-        },
-        {
-            "title": "## 🔧 Build and CI",
-            "labels": ["ci"]
-        }
-    ]
-}
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -1,12 +1,8 @@
 name: Cargo Publish

 on:
-  push:
-    tags-ignore:
-      # We don't publish pre-releases for Rust. Crates.io is just a source
-      # distribution, so we don't need to publish pre-releases.
-      - 'v*-beta*'
-      - '*-v*' # for example, python-vX.Y.Z
+  release:
+    types: [ published ]

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -1,85 +0,0 @@
-name: Build and Run Java JNI Tests
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-    paths:
-      - java/**
-      - rust/**
-      - .github/workflows/java.yml
-env:
-  # This env var is used by Swatinem/rust-cache@v2 for the cache
-  # key, so we set it to make sure it is always consistent.
-  CARGO_TERM_COLOR: always
-  # Disable full debug symbol generation to speed up CI build and keep memory down
-  # "1" means line tables only, which is useful for panic tracebacks.
-  RUSTFLAGS: "-C debuginfo=1"
-  RUST_BACKTRACE: "1"
-  # according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
-  # CI builds are faster with incremental disabled.
-  CARGO_INCREMENTAL: "0"
-  CARGO_BUILD_JOBS: "1"
-jobs:
-  linux-build:
-    runs-on: ubuntu-22.04
-    name: ubuntu-22.04 + Java 11 & 17
-    defaults:
-      run:
-        working-directory: ./java
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: java/core/lancedb-jni
-      - name: Run cargo fmt
-        run: cargo fmt --check
-        working-directory: ./java/core/lancedb-jni
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Install Java 17
-        uses: actions/setup-java@v4
-        with:
-          distribution: temurin
-          java-version: 17
-          cache: "maven"
-      - run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV
-      - name: Install Java 11
-        uses: actions/setup-java@v4
-        with:
-          distribution: temurin
-          java-version: 11
-          cache: "maven"
-      - name: Java Style Check
-        run: mvn checkstyle:check
-      # Disable because of issues in lancedb rust core code
-      # - name: Rust Clippy
-      #   working-directory: java/core/lancedb-jni
-      #   run: cargo clippy --all-targets -- -D warnings
-      - name: Running tests with Java 11
-        run: mvn clean test
-      - name: Running tests with Java 17
-        run: |
-          export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \
-          -XX:+IgnoreUnrecognizedVMOptions \
-          --add-opens=java.base/java.lang=ALL-UNNAMED \
-          --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
-          --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
-          --add-opens=java.base/java.io=ALL-UNNAMED \
-          --add-opens=java.base/java.net=ALL-UNNAMED \
-          --add-opens=java.base/java.nio=ALL-UNNAMED \
-          --add-opens=java.base/java.util=ALL-UNNAMED \
-          --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
-          --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
-          --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
-          --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
-          --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
-          --add-opens=java.base/sun.security.action=ALL-UNNAMED \
-          --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
-          --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
-          -Djdk.reflect.useDirectMethodHandle=false \
-          -Dio.netty.tryReflectionSetAccessible=true"
-          JAVA_HOME=$JAVA_17 mvn clean test
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -1,62 +1,37 @@
 name: Create release commit

-# This workflow increments versions, tags the version, and pushes it.
-# When a tag is pushed, another workflow is triggered that creates a GH release
-# and uploads the binaries. This workflow is only for creating the tag.
-
-# This script will enforce that a minor version is incremented if there are any
-# breaking changes since the last minor increment. However, it isn't able to
-# differentiate between breaking changes in Node versus Python. If you wish to
-# bypass this check, you can manually increment the version and push the tag.
 on:
  workflow_dispatch:
    inputs:
      dry_run:
        description: 'Dry run (create the local commit/tags but do not push it)'
        required: true
-        default: false
-        type: boolean
-      type:
-        description: 'What kind of release is this?'
-        required: true
-        default: 'preview'
+        default: "false"
        type: choice
        options:
-          - preview
-          - stable
-      python:
-        description: 'Make a Python release'
+          - "true"
+          - "false"
+      part:
+        description: 'What kind of release is this?'
        required: true
-        default: true
-        type: boolean
-      other:
-        description: 'Make a Node/Rust release'
-        required: true
-        default: true
-        type: boolean
-      bump-minor:
-        description: 'Bump minor version'
-        required: true
-        default: false
-        type: boolean
+        default: 'patch'
+        type: choice
+        options:
+          - patch
+          - minor
+          - major

 jobs:
-  make-release:
-    # Creates tag and GH release. The GH release will trigger the build and release jobs.
+  bump-version:
    runs-on: ubuntu-latest
-    permissions:
-      contents: write
    steps:
-      - name: Output Inputs
-        run: echo "${{ toJSON(github.event.inputs) }}"
-      - uses: actions/checkout@v4
+      - name: Check out main
+        uses: actions/checkout@v4
        with:
+          ref: main
+          persist-credentials: false
          fetch-depth: 0
          lfs: true
-          # It's important we use our token here, as the default token will NOT
-          # trigger any workflows watching for new tags. See:
-          # https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow
-          token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
      - name: Set git configs for bumpversion
        shell: bash
        run: |
@@ -66,34 +41,19 @@ jobs:
        uses: actions/setup-python@v5
        with:
          python-version: "3.11"
-      - name: Bump Python version
-        if: ${{ inputs.python }}
-        working-directory: python
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Bump version, create tag and commit
        run: |
-          # Need to get the commit before bumping the version, so we can
-          # determine if there are breaking changes in the next step as well.
-          echo "COMMIT_BEFORE_BUMP=$(git rev-parse HEAD)" >> $GITHUB_ENV
-
-          pip install bump-my-version PyGithub packaging
-          bash ../ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} python-v
-      - name: Bump Node/Rust version
-        if: ${{ inputs.other }}
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          pip install bump-my-version PyGithub packaging
-          bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
-      - name: Push new version tag
-        if: ${{ !inputs.dry_run }}
+          pip install bump2version
+          bumpversion --verbose ${{ inputs.part }}
+      - name: Push new version and tag
+        if: ${{ inputs.dry_run }} == "false"
        uses: ad-m/github-push-action@master
        with:
-          # Need to use PAT here too to trigger next workflow. See comment above.
          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: ${{ github.ref }}
+          branch: main
          tags: true
      - uses: ./.github/workflows/update_package_lock
        if: ${{ inputs.dry_run }} == "false"
        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
+
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -1,9 +1,8 @@
 name: NPM Publish

 on:
-  push:
-    tags:
-      - 'v*'
+  release:
+    types: [published]

 jobs:
  node:
@@ -275,15 +274,9 @@ jobs:
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        run: |
-          # Tag beta as "preview" instead of default "latest". See lancedb 
-          # npm publish step for more info.
-          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
-            PUBLISH_ARGS="--tag preview"
-          fi
-
          mv */*.tgz .
          for filename in *.tgz; do
-            npm publish $PUBLISH_ARGS $filename
+            npm publish $filename
          done

  release-nodejs:
@@ -323,23 +316,11 @@ jobs:
      - name: Publish to NPM
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
-        # By default, things are published to the latest tag. This is what is 
-        # installed by default if the user does not specify a version. This is
-        # good for stable releases, but for pre-releases, we want to publish to
-        # the "preview" tag so they can install with `npm install lancedb@preview`.
-        # See: https://medium.com/@mbostock/prereleases-and-npm-e778fc5e2420
-        run: |
-          if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
-            npm publish --access public --tag preview
-          else
-            npm publish --access public
-          fi
+        run: npm publish --access public

  update-package-lock:
    needs: [release]
    runs-on: ubuntu-latest
-    permissions:
-      contents: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -350,13 +331,11 @@ jobs:
          lfs: true
      - uses: ./.github/workflows/update_package_lock
        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}

  update-package-lock-nodejs:
    needs: [release-nodejs]
    runs-on: ubuntu-latest
-    permissions:
-      contents: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -367,70 +346,4 @@ jobs:
          lfs: true
      - uses: ./.github/workflows/update_package_lock_nodejs
        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
-  
-  gh-release:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          lfs: true
-      - name: Extract version
-        id: extract_version
-        env:
-          GITHUB_REF: ${{ github.ref }}
-        run: |
-          set -e
-          echo "Extracting tag and version from $GITHUB_REF"
-          if [[ $GITHUB_REF =~ refs/tags/v(.*) ]]; then
-            VERSION=${BASH_REMATCH[1]}
-            TAG=v$VERSION
-            echo "tag=$TAG" >> $GITHUB_OUTPUT
-            echo "version=$VERSION" >> $GITHUB_OUTPUT
-          else
-            echo "Failed to extract version from $GITHUB_REF"
-            exit 1
-          fi
-          echo "Extracted version $VERSION from $GITHUB_REF"
-          if [[ $VERSION =~ beta ]]; then
-            echo "This is a beta release"
-
-            # Get last release (that is not this one)
-            FROM_TAG=$(git tag --sort='version:refname' \
-              | grep ^v \
-              | grep -vF "$TAG" \
-              | python ci/semver_sort.py v \
-              | tail -n 1)
-          else
-            echo "This is a stable release"
-            # Get last stable tag (ignore betas)
-            FROM_TAG=$(git tag --sort='version:refname' \
-              | grep ^v \
-              | grep -vF "$TAG" \
-              | grep -v beta \
-              | python ci/semver_sort.py v \
-              | tail -n 1)
-          fi
-          echo "Found from tag $FROM_TAG"
-          echo "from_tag=$FROM_TAG" >> $GITHUB_OUTPUT
-      - name: Create Release Notes
-        id: release_notes
-        uses: mikepenz/release-changelog-builder-action@v4
-        with:
-          configuration: .github/release_notes.json
-          toTag: ${{ steps.extract_version.outputs.tag }}
-          fromTag: ${{ steps.extract_version.outputs.from_tag }}
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: Create GH release
-        uses: softprops/action-gh-release@v2
-        with:
-          prerelease: ${{ contains('beta', github.ref) }}
-          tag_name: ${{ steps.extract_version.outputs.tag }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          generate_release_notes: false
-          name: Node/Rust LanceDB v${{ steps.extract_version.outputs.version }}
-          body: ${{ steps.release_notes.outputs.changelog }}
+          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -1,16 +1,18 @@
 name: PyPI Publish

 on:
-  push:
-    tags:
-      - 'python-v*'
+  release:
+    types: [published]

 jobs:
  linux:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
    timeout-minutes: 60
    strategy:
      matrix:
+        python-minor-version: ["8"]
        config:
          - platform: x86_64
            manylinux: "2_17"
@@ -32,22 +34,25 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
-          python-version: 3.8
+          python-version: 3.${{ matrix.python-minor-version }}
      - uses: ./.github/workflows/build_linux_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: ${{ matrix.python-minor-version }}
          args: "--release --strip ${{ matrix.config.extra_args }}"
          arm-build: ${{ matrix.config.platform == 'aarch64' }}
          manylinux: ${{ matrix.config.manylinux }}
      - uses: ./.github/workflows/upload_wheel
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
-          fury_token: ${{ secrets.FURY_TOKEN }}
+          token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
+          repo: "pypi"
  mac:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    timeout-minutes: 60
    runs-on: ${{ matrix.config.runner }}
    strategy:
      matrix:
+        python-minor-version: ["8"]
        config:
          - target: x86_64-apple-darwin
            runner: macos-13
@@ -58,6 +63,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
+          ref: ${{ inputs.ref }}
          fetch-depth: 0
          lfs: true
      - name: Set up Python
@@ -66,95 +72,38 @@ jobs:
          python-version: 3.12
      - uses: ./.github/workflows/build_mac_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: ${{ matrix.python-minor-version }}
          args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
      - uses: ./.github/workflows/upload_wheel
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
-          fury_token: ${{ secrets.FURY_TOKEN }}
+          python-minor-version: ${{ matrix.python-minor-version }}
+          token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
+          repo: "pypi"
  windows:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    timeout-minutes: 60
    runs-on: windows-latest
+    strategy:
+      matrix:
+        python-minor-version: ["8"]
    steps:
      - uses: actions/checkout@v4
        with:
+          ref: ${{ inputs.ref }}
          fetch-depth: 0
          lfs: true
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
-          python-version: 3.8
+          python-version: 3.${{ matrix.python-minor-version }}
      - uses: ./.github/workflows/build_windows_wheel
        with:
-          python-minor-version: 8
+          python-minor-version: ${{ matrix.python-minor-version }}
          args: "--release --strip"
          vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
      - uses: ./.github/workflows/upload_wheel
        with:
-          pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
-          fury_token: ${{ secrets.FURY_TOKEN }}
-  gh-release:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          lfs: true
-      - name: Extract version
-        id: extract_version
-        env:
-          GITHUB_REF: ${{ github.ref }}
-        run: |
-          set -e
-          echo "Extracting tag and version from $GITHUB_REF"
-          if [[ $GITHUB_REF =~ refs/tags/python-v(.*) ]]; then
-            VERSION=${BASH_REMATCH[1]}
-            TAG=python-v$VERSION
-            echo "tag=$TAG" >> $GITHUB_OUTPUT
-            echo "version=$VERSION" >> $GITHUB_OUTPUT
-          else
-            echo "Failed to extract version from $GITHUB_REF"
-            exit 1
-          fi
-          echo "Extracted version $VERSION from $GITHUB_REF"
-          if [[ $VERSION =~ beta ]]; then
-            echo "This is a beta release"
-
-            # Get last release (that is not this one)
-            FROM_TAG=$(git tag --sort='version:refname' \
-              | grep ^python-v \
-              | grep -vF "$TAG" \
-              | python ci/semver_sort.py python-v \
-              | tail -n 1)
-          else
-            echo "This is a stable release"
-            # Get last stable tag (ignore betas)
-            FROM_TAG=$(git tag --sort='version:refname' \
-              | grep ^python-v \
-              | grep -vF "$TAG" \
-              | grep -v beta \
-              | python ci/semver_sort.py python-v \
-              | tail -n 1)
-          fi
-          echo "Found from tag $FROM_TAG"
-          echo "from_tag=$FROM_TAG" >> $GITHUB_OUTPUT
-      - name: Create Python Release Notes
-        id: python_release_notes
-        uses: mikepenz/release-changelog-builder-action@v4
-        with:
-          configuration: .github/release_notes.json
-          toTag: ${{ steps.extract_version.outputs.tag }}
-          fromTag: ${{ steps.extract_version.outputs.from_tag }}
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: Create Python GH release
-        uses: softprops/action-gh-release@v2
-        with:
-          prerelease: ${{ contains('beta', github.ref) }}
-          tag_name: ${{ steps.extract_version.outputs.tag }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          generate_release_notes: false
-          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
-          body: ${{ steps.python_release_notes.outputs.changelog }}
+          python-minor-version: ${{ matrix.python-minor-version }}
+          token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
+          repo: "pypi"
--- a/.github/workflows/python-make-release-commit.yml
+++ b/.github/workflows/python-make-release-commit.yml
@@ -0,0 +1,56 @@
+name: Python - Create release commit
+
+on:
+  workflow_dispatch:
+    inputs:
+      dry_run:
+        description: 'Dry run (create the local commit/tags but do not push it)'
+        required: true
+        default: "false"
+        type: choice
+        options:
+          - "true"
+          - "false"
+      part:
+        description: 'What kind of release is this?'
+        required: true
+        default: 'patch'
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+
+jobs:
+  bump-version:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Check out main
+      uses: actions/checkout@v4
+      with:
+        ref: main
+        persist-credentials: false
+        fetch-depth: 0
+        lfs: true
+    - name: Set git configs for bumpversion
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.11"
+    - name: Bump version, create tag and commit
+      working-directory: python
+      run: |
+        pip install bump2version
+        bumpversion --verbose ${{ inputs.part }}
+    - name: Push new version and tag
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
+        branch: main
+        tags: true
+
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -75,7 +75,7 @@ jobs:
    timeout-minutes: 30
    strategy:
      matrix:
-        python-minor-version: ["9", "11"]
+        python-minor-version: ["8", "11"]
    runs-on: "ubuntu-22.04"
    defaults:
      run:
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -74,11 +74,11 @@ jobs:
      run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
+    - name: Build
+      run: cargo build --all-features
    - name: Start S3 integration test environment
      working-directory: .
      run: docker compose up --detach --wait
-    - name: Build
-      run: cargo build --all-features
    - name: Run tests
      run: cargo test --all-features
    - name: Run examples
--- a/.github/workflows/upload_wheel/action.yml
+++ b/.github/workflows/upload_wheel/action.yml
@@ -2,43 +2,28 @@ name: upload-wheel

 description: "Upload wheels to Pypi"
 inputs:
-  pypi_token:
+  os:
+    required: true
+    description: "ubuntu-22.04 or macos-13"
+  repo:
+    required: false
+    description: "pypi or testpypi"
+    default: "pypi"
+  token:
    required: true
    description: "release token for the repo"
-  fury_token:
-    required: true
-    description: "release token for the fury repo"

 runs:
  using: "composite"
  steps:
-  - name: Install dependencies
-    shell: bash
-    run: |
-      python -m pip install --upgrade pip
-      pip install twine
-  - name: Choose repo
-    shell: bash
-    id: choose_repo
-    run: |
-      if [ ${{ github.ref }} == "*beta*" ]; then
-        echo "repo=fury" >> $GITHUB_OUTPUT
-      else
-        echo "repo=pypi" >> $GITHUB_OUTPUT
-      fi
-  - name: Publish to PyPI
-    shell: bash
-    env:
-      FURY_TOKEN: ${{ inputs.fury_token }}
-      PYPI_TOKEN: ${{ inputs.pypi_token }}
-    run: |
-      if [ ${{ steps.choose_repo.outputs.repo }} == "fury" ]; then
-        WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
-        echo "Uploading $WHEEL to Fury"
-        curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
-      else
-        twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
-          --username __token__ \
-          --password $PYPI_TOKEN \
-          target/wheels/lancedb-*.whl
-      fi
+    - name: Install dependencies
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        pip install twine
+    - name: Publish wheel
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ inputs.token }}
+      shell: bash
+      run: twine upload --repository ${{ inputs.repo }} target/wheels/lancedb-*.whl
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
  hooks:
    - id: local-biome-check
      name: biome check
-      entry: npx @biomejs/biome check --config-path nodejs/biome.json nodejs/
+      entry: npx biome check
      language: system
      types: [text]
      files: "nodejs/.*"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = ["rust/ffi/node", "rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
+members = ["rust/ffi/node", "rust/lancedb", "nodejs", "python"]
 # Python package needs to be built by maturin.
 exclude = ["python"]
 resolver = "2"
@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]

 [workspace.dependencies]
-lance = { "version" = "=0.11.1", "features" = ["dynamodb"] }
-lance-index = { "version" = "=0.11.1" }
-lance-linalg = { "version" = "=0.11.1" }
-lance-testing = { "version" = "=0.11.1" }
+lance = { "version" = "=0.10.18", "features" = ["dynamodb"] }
+lance-index = { "version" = "=0.10.18" }
+lance-linalg = { "version" = "=0.10.18" }
+lance-testing = { "version" = "=0.10.18" }
 # Note that this one does not include pyarrow
 arrow = { version = "51.0", optional = false }
 arrow-array = "51.0"
--- a/ci/bump_version.sh
+++ b/ci/bump_version.sh
@@ -1,51 +0,0 @@
-set -e
-
-RELEASE_TYPE=${1:-"stable"}
-BUMP_MINOR=${2:-false}
-TAG_PREFIX=${3:-"v"} # Such as "python-v"
-HEAD_SHA=${4:-$(git rev-parse HEAD)}
-
-readonly SELF_DIR=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-
-PREV_TAG=$(git tag --sort='version:refname' | grep ^$TAG_PREFIX | python $SELF_DIR/semver_sort.py $TAG_PREFIX | tail -n 1)
-echo "Found previous tag $PREV_TAG"
-
-# Initially, we don't want to tag if we are doing stable, because we will bump
-# again later. See comment at end for why.
-if [[ "$RELEASE_TYPE" == 'stable' ]]; then 
-  BUMP_ARGS="--no-tag"
-fi
-
-# If last is stable and not bumping minor
-if [[ $PREV_TAG != *beta* ]]; then
-    if [[ "$BUMP_MINOR" != "false" ]]; then
-      # X.Y.Z -> X.(Y+1).0-beta.0
-      bump-my-version bump -vv $BUMP_ARGS minor
-    else
-      # X.Y.Z -> X.Y.(Z+1)-beta.0
-      bump-my-version bump -vv $BUMP_ARGS patch
-    fi
-else
-  if [[ "$BUMP_MINOR" != "false" ]]; then
-    # X.Y.Z-beta.N -> X.(Y+1).0-beta.0
-    bump-my-version bump -vv $BUMP_ARGS minor
-  else
-    # X.Y.Z-beta.N -> X.Y.Z-beta.(N+1)
-    bump-my-version bump -vv $BUMP_ARGS pre_n
-  fi
-fi
-
-# The above bump will always bump to a pre-release version. If we are releasing
-# a stable version, bump the pre-release level ("pre_l") to make it stable.
-if [[ $RELEASE_TYPE == 'stable' ]]; then
-  # X.Y.Z-beta.N -> X.Y.Z
-  bump-my-version bump -vv pre_l
-fi
-
-# Validate that we have incremented version appropriately for breaking changes
-NEW_TAG=$(git describe --tags --exact-match HEAD)
-NEW_VERSION=$(echo $NEW_TAG | sed "s/^$TAG_PREFIX//")
-LAST_STABLE_RELEASE=$(git tag --sort='version:refname' | grep ^$TAG_PREFIX | grep -v beta | grep -vF "$NEW_TAG" | python $SELF_DIR/semver_sort.py $TAG_PREFIX | tail -n 1)
-LAST_STABLE_VERSION=$(echo $LAST_STABLE_RELEASE | sed "s/^$TAG_PREFIX//")
-
-python $SELF_DIR/check_breaking_changes.py $LAST_STABLE_RELEASE $HEAD_SHA $LAST_STABLE_VERSION $NEW_VERSION
--- a/ci/check_breaking_changes.py
+++ b/ci/check_breaking_changes.py
@@ -1,35 +0,0 @@
-"""
-Check whether there are any breaking changes in the PRs between the base and head commits.
-If there are, assert that we have incremented the minor version.
-"""
-import argparse
-import os
-from packaging.version import parse
-
-from github import Github
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("base")
-    parser.add_argument("head")
-    parser.add_argument("last_stable_version")
-    parser.add_argument("current_version")
-    args = parser.parse_args()
-
-    repo = Github(os.environ["GITHUB_TOKEN"]).get_repo(os.environ["GITHUB_REPOSITORY"])
-    commits = repo.compare(args.base, args.head).commits
-    prs = (pr for commit in commits for pr in commit.get_pulls())
-
-    for pr in prs:
-        if any(label.name == "breaking-change" for label in pr.labels):
-            print(f"Breaking change in PR: {pr.html_url}")
-            break
-    else:
-        print("No breaking changes found.")
-        exit(0)
-    
-    last_stable_version = parse(args.last_stable_version)
-    current_version = parse(args.current_version)
-    if current_version.minor <= last_stable_version.minor:
-        print("Minor version is not greater than the last stable version.")
-        exit(1)
--- a/ci/semver_sort.py
+++ b/ci/semver_sort.py
@@ -1,35 +0,0 @@
-"""
-Takes a list of semver strings and sorts them in ascending order.
-"""
-
-import sys
-from packaging.version import parse, InvalidVersion
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument("prefix", default="v")
-    args = parser.parse_args()
-
-    # Read the input from stdin
-    lines = sys.stdin.readlines()
-
-    # Parse the versions
-    versions = []
-    for line in lines:
-        line = line.strip()
-        try:
-            version_str = line.removeprefix(args.prefix)
-            version = parse(version_str)
-        except InvalidVersion:
-            # There are old tags that don't follow the semver format
-            print(f"Invalid version: {line}", file=sys.stderr)
-            continue
-        versions.append((line, version))
-
-    # Sort the versions
-    versions.sort(key=lambda x: x[1])
-
-    # Print the sorted versions as original strings
-    for line, _ in versions:
-        print(line)
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -44,36 +44,6 @@

    !!! info "Please also make sure you're using the same version of Arrow as in the [lancedb crate](https://github.com/lancedb/lancedb/blob/main/Cargo.toml)"

-### Preview releases
-
-Stable releases are created about every 2 weeks. For the latest features and bug
-fixes, you can install the preview release. These releases receive the same
-level of testing as stable releases, but are not guaranteed to be available for
-more than 6 months after they are released. Once your application is stable, we
-recommend switching to stable releases.
-
-=== "Python"
-
-      ```shell
-      pip install --pre --extra-index-url https://pypi.fury.io/lancedb/ lancedb
-      ```
-
-=== "Typescript"
-
-      ```shell
-      npm install vectordb@preview
-      ```
-
-=== "Rust"
-    
-    We don't push preview releases to crates.io, but you can referent the tag
-    in GitHub within your Cargo dependencies:
-
-    ```toml
-    [dependencies]
-    lancedb = { git = "https://github.com/lancedb/lancedb.git", tag = "vX.Y.Z-beta.N" }
-    ```
-
 ## Connect to a database

 === "Python"
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -1,27 +0,0 @@
-[package]
-name = "lancedb-jni"
-description = "JNI bindings for LanceDB"
-# TODO modify lancedb/Cargo.toml for version and dependencies
-version = "0.4.18"
-edition.workspace = true
-repository.workspace = true
-readme.workspace = true
-license.workspace = true
-keywords.workspace = true
-categories.workspace = true
-publish = false
-
-[lib]
-crate-type = ["cdylib"]
-
-[dependencies]
-lancedb = { path = "../../../rust/lancedb" }
-lance = { workspace = true }
-arrow = { workspace = true, features = ["ffi"] }
-arrow-schema.workspace = true
-tokio = "1.23"
-jni = "0.21.1"
-snafu.workspace = true
-lazy_static.workspace = true
-serde = { version = "^1" }
-serde_json = { version = "1" }
--- a/java/core/lancedb-jni/src/connection.rs
+++ b/java/core/lancedb-jni/src/connection.rs
@@ -1,130 +0,0 @@
-use crate::ffi::JNIEnvExt;
-use crate::traits::IntoJava;
-use crate::{Error, RT};
-use jni::objects::{JObject, JString, JValue};
-use jni::JNIEnv;
-pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle";
-use crate::Result;
-use lancedb::connection::{connect, Connection};
-
-#[derive(Clone)]
-pub struct BlockingConnection {
-    pub(crate) inner: Connection,
-}
-
-impl BlockingConnection {
-    pub fn create(dataset_uri: &str) -> Result<Self> {
-        let inner = RT.block_on(connect(dataset_uri).execute())?;
-        Ok(Self { inner })
-    }
-
-    pub fn table_names(
-        &self,
-        start_after: Option<String>,
-        limit: Option<i32>,
-    ) -> Result<Vec<String>> {
-        let mut op = self.inner.table_names();
-        if let Some(start_after) = start_after {
-            op = op.start_after(start_after);
-        }
-        if let Some(limit) = limit {
-            op = op.limit(limit as u32);
-        }
-        Ok(RT.block_on(op.execute())?)
-    }
-}
-
-impl IntoJava for BlockingConnection {
-    fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> {
-        attach_native_connection(env, self)
-    }
-}
-
-fn attach_native_connection<'local>(
-    env: &mut JNIEnv<'local>,
-    connection: BlockingConnection,
-) -> JObject<'local> {
-    let j_connection = create_java_connection_object(env);
-    // This block sets a native Rust object (Connection) as a field in the Java object (j_Connection).
-    // Caution: This creates a potential for memory leaks. The Rust object (Connection) is not
-    // automatically garbage-collected by Java, and its memory will not be freed unless
-    // explicitly handled.
-    //
-    // To prevent memory leaks, ensure the following:
-    // 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface.
-    // 2. Users of this Java object should be instructed to always use it within a try-with-resources
-    //    statement (or manually call the `close()` method) to ensure that `self.close()` is invoked.
-    match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } {
-        Ok(_) => j_connection,
-        Err(err) => {
-            env.throw_new(
-                "java/lang/RuntimeException",
-                format!("Failed to set native handle for Connection: {}", err),
-            )
-            .expect("Error throwing exception");
-            JObject::null()
-        }
-    }
-}
-
-fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> {
-    env.new_object("com/lancedb/lancedb/Connection", "()V", &[])
-        .expect("Failed to create Java Lance Connection instance")
-}
-
-#[no_mangle]
-pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection(
-    mut env: JNIEnv,
-    j_connection: JObject,
-) {
-    let _: BlockingConnection = unsafe {
-        env.take_rust_field(j_connection, NATIVE_CONNECTION)
-            .expect("Failed to take native Connection handle")
-    };
-}
-
-#[no_mangle]
-pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>(
-    mut env: JNIEnv<'local>,
-    _obj: JObject,
-    dataset_uri_object: JString,
-) -> JObject<'local> {
-    let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into();
-    let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri));
-    blocking_connection.into_java(&mut env)
-}
-
-#[no_mangle]
-pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>(
-    mut env: JNIEnv<'local>,
-    j_connection: JObject,
-    start_after_obj: JObject, // Optional<String>
-    limit_obj: JObject,       // Optional<Integer>
-) -> JObject<'local> {
-    ok_or_throw!(
-        env,
-        inner_table_names(&mut env, j_connection, start_after_obj, limit_obj)
-    )
-}
-
-fn inner_table_names<'local>(
-    env: &mut JNIEnv<'local>,
-    j_connection: JObject,
-    start_after_obj: JObject, // Optional<String>
-    limit_obj: JObject,       // Optional<Integer>
-) -> Result<JObject<'local>> {
-    let start_after = env.get_string_opt(&start_after_obj)?;
-    let limit = env.get_int_opt(&limit_obj)?;
-    let conn =
-        unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?;
-    let table_names = conn.table_names(start_after, limit)?;
-    drop(conn);
-    let j_names = env.new_object("java/util/ArrayList", "()V", &[])?;
-    for item in table_names {
-        let jstr_item = env.new_string(item)?;
-        let item_jobj = JObject::from(jstr_item);
-        let item_gen = JValue::Object(&item_jobj);
-        env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?;
-    }
-    Ok(j_names)
-}
--- a/java/core/lancedb-jni/src/error.rs
+++ b/java/core/lancedb-jni/src/error.rs
@@ -1,225 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::str::Utf8Error;
-
-use arrow_schema::ArrowError;
-use jni::errors::Error as JniError;
-use serde_json::Error as JsonError;
-use snafu::{Location, Snafu};
-
-type BoxedError = Box<dyn std::error::Error + Send + Sync + 'static>;
-
-/// Java Exception types
-pub enum JavaException {
-    IllegalArgumentException,
-    IOException,
-    RuntimeException,
-}
-
-impl JavaException {
-    pub fn as_str(&self) -> &str {
-        match self {
-            Self::IllegalArgumentException => "java/lang/IllegalArgumentException",
-            Self::IOException => "java/io/IOException",
-            Self::RuntimeException => "java/lang/RuntimeException",
-        }
-    }
-}
-/// TODO(lu) change to lancedb-jni
-#[derive(Debug, Snafu)]
-#[snafu(visibility(pub))]
-pub enum Error {
-    #[snafu(display("JNI error: {message}, {location}"))]
-    Jni { message: String, location: Location },
-    #[snafu(display("Invalid argument: {message}, {location}"))]
-    InvalidArgument { message: String, location: Location },
-    #[snafu(display("IO error: {source}, {location}"))]
-    IO {
-        source: BoxedError,
-        location: Location,
-    },
-    #[snafu(display("Arrow error: {message}, {location}"))]
-    Arrow { message: String, location: Location },
-    #[snafu(display("Index error: {message}, {location}"))]
-    Index { message: String, location: Location },
-    #[snafu(display("JSON error: {message}, {location}"))]
-    JSON { message: String, location: Location },
-    #[snafu(display("Dataset at path {path} was not found, {location}"))]
-    DatasetNotFound { path: String, location: Location },
-    #[snafu(display("Dataset already exists: {uri}, {location}"))]
-    DatasetAlreadyExists { uri: String, location: Location },
-    #[snafu(display("Table '{name}' already exists"))]
-    TableAlreadyExists { name: String },
-    #[snafu(display("Table '{name}' was not found"))]
-    TableNotFound { name: String },
-    #[snafu(display("Invalid table name '{name}': {reason}"))]
-    InvalidTableName { name: String, reason: String },
-    #[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
-    EmbeddingFunctionNotFound {
-        name: String,
-        reason: String,
-        location: Location,
-    },
-    #[snafu(display("Other Lance error: {message}, {location}"))]
-    OtherLance { message: String, location: Location },
-    #[snafu(display("Other LanceDB error: {message}, {location}"))]
-    OtherLanceDB { message: String, location: Location },
-}
-
-impl Error {
-    /// Throw as Java Exception
-    pub fn throw(&self, env: &mut jni::JNIEnv) {
-        match self {
-            Self::InvalidArgument { .. }
-            | Self::DatasetNotFound { .. }
-            | Self::DatasetAlreadyExists { .. }
-            | Self::TableAlreadyExists { .. }
-            | Self::TableNotFound { .. }
-            | Self::InvalidTableName { .. }
-            | Self::EmbeddingFunctionNotFound { .. } => {
-                self.throw_as(env, JavaException::IllegalArgumentException)
-            }
-            Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException),
-            Self::Arrow { .. }
-            | Self::JSON { .. }
-            | Self::OtherLance { .. }
-            | Self::OtherLanceDB { .. }
-            | Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException),
-        }
-    }
-
-    /// Throw as an concrete Java Exception
-    pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) {
-        let message = &format!(
-            "Error when throwing Java exception: {}:{}",
-            exception.as_str(),
-            self
-        );
-        env.throw_new(exception.as_str(), self.to_string())
-            .expect(message);
-    }
-}
-
-pub type Result<T> = std::result::Result<T, Error>;
-
-trait ToSnafuLocation {
-    fn to_snafu_location(&'static self) -> snafu::Location;
-}
-
-impl ToSnafuLocation for std::panic::Location<'static> {
-    fn to_snafu_location(&'static self) -> snafu::Location {
-        snafu::Location::new(self.file(), self.line(), self.column())
-    }
-}
-
-impl From<JniError> for Error {
-    #[track_caller]
-    fn from(source: JniError) -> Self {
-        Self::Jni {
-            message: source.to_string(),
-            location: std::panic::Location::caller().to_snafu_location(),
-        }
-    }
-}
-
-impl From<Utf8Error> for Error {
-    #[track_caller]
-    fn from(source: Utf8Error) -> Self {
-        Self::InvalidArgument {
-            message: source.to_string(),
-            location: std::panic::Location::caller().to_snafu_location(),
-        }
-    }
-}
-
-impl From<ArrowError> for Error {
-    #[track_caller]
-    fn from(source: ArrowError) -> Self {
-        Self::Arrow {
-            message: source.to_string(),
-            location: std::panic::Location::caller().to_snafu_location(),
-        }
-    }
-}
-
-impl From<JsonError> for Error {
-    #[track_caller]
-    fn from(source: JsonError) -> Self {
-        Self::JSON {
-            message: source.to_string(),
-            location: std::panic::Location::caller().to_snafu_location(),
-        }
-    }
-}
-
-impl From<lance::Error> for Error {
-    #[track_caller]
-    fn from(source: lance::Error) -> Self {
-        match source {
-            lance::Error::DatasetNotFound {
-                path,
-                source: _,
-                location,
-            } => Self::DatasetNotFound { path, location },
-            lance::Error::DatasetAlreadyExists { uri, location } => {
-                Self::DatasetAlreadyExists { uri, location }
-            }
-            lance::Error::IO { source, location } => Self::IO { source, location },
-            lance::Error::Arrow { message, location } => Self::Arrow { message, location },
-            lance::Error::Index { message, location } => Self::Index { message, location },
-            lance::Error::InvalidInput { source, location } => Self::InvalidArgument {
-                message: source.to_string(),
-                location,
-            },
-            _ => Self::OtherLance {
-                message: source.to_string(),
-                location: std::panic::Location::caller().to_snafu_location(),
-            },
-        }
-    }
-}
-
-impl From<lancedb::Error> for Error {
-    #[track_caller]
-    fn from(source: lancedb::Error) -> Self {
-        match source {
-            lancedb::Error::InvalidTableName { name, reason } => {
-                Self::InvalidTableName { name, reason }
-            }
-            lancedb::Error::InvalidInput { message } => Self::InvalidArgument {
-                message,
-                location: std::panic::Location::caller().to_snafu_location(),
-            },
-            lancedb::Error::TableNotFound { name } => Self::TableNotFound { name },
-            lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
-            lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
-                Self::EmbeddingFunctionNotFound {
-                    name,
-                    reason,
-                    location: std::panic::Location::caller().to_snafu_location(),
-                }
-            }
-            lancedb::Error::Arrow { source } => Self::Arrow {
-                message: source.to_string(),
-                location: std::panic::Location::caller().to_snafu_location(),
-            },
-            lancedb::Error::Lance { source } => Self::from(source),
-            _ => Self::OtherLanceDB {
-                message: source.to_string(),
-                location: std::panic::Location::caller().to_snafu_location(),
-            },
-        }
-    }
-}
--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -1,204 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use core::slice;
-
-use jni::objects::{JByteBuffer, JObjectArray, JString};
-use jni::sys::jobjectArray;
-use jni::{objects::JObject, JNIEnv};
-
-use crate::error::{Error, Result};
-
-/// TODO(lu) import from lance-jni without duplicate
-/// Extend JNIEnv with helper functions.
-pub trait JNIEnvExt {
-    /// Get integers from Java List<Integer> object.
-    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
-
-    /// Get strings from Java List<String> object.
-    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
-
-    /// Get strings from Java String[] object.
-    /// Note that get Option<Vec<String>> from Java Optional<String[]> just doesn't work.
-    #[allow(unused)]
-    fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>>;
-
-    /// Get Option<String> from Java Optional<String>.
-    fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>>;
-
-    /// Get Option<Vec<String>> from Java Optional<List<String>>.
-    #[allow(unused)]
-    fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>>;
-
-    /// Get Option<i32> from Java Optional<Integer>.
-    fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>>;
-
-    /// Get Option<Vec<i32>> from Java Optional<List<Integer>>.
-    fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>>;
-
-    /// Get Option<i64> from Java Optional<Long>.
-    #[allow(unused)]
-    fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>>;
-
-    /// Get Option<u64> from Java Optional<Long>.
-    #[allow(unused)]
-    fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>>;
-
-    /// Get Option<&[u8]> from Java Optional<ByteBuffer>.
-    #[allow(unused)]
-    fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>>;
-
-    fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
-    where
-        F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>;
-}
-
-impl JNIEnvExt for JNIEnv<'_> {
-    fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>> {
-        let list = self.get_list(obj)?;
-        let mut iter = list.iter(self)?;
-        let mut results = Vec::with_capacity(list.size(self)? as usize);
-        while let Some(elem) = iter.next(self)? {
-            let int_obj = self.call_method(elem, "intValue", "()I", &[])?;
-            let int_value = int_obj.i()?;
-            results.push(int_value);
-        }
-        Ok(results)
-    }
-
-    fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>> {
-        let list = self.get_list(obj)?;
-        let mut iter = list.iter(self)?;
-        let mut results = Vec::with_capacity(list.size(self)? as usize);
-        while let Some(elem) = iter.next(self)? {
-            let jstr = JString::from(elem);
-            let val = self.get_string(&jstr)?;
-            results.push(val.to_str()?.to_string())
-        }
-        Ok(results)
-    }
-
-    fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>> {
-        let jobject_array = unsafe { JObjectArray::from_raw(obj) };
-        let array_len = self.get_array_length(&jobject_array)?;
-        let mut res: Vec<String> = Vec::new();
-        for i in 0..array_len {
-            let item: JString = self.get_object_array_element(&jobject_array, i)?.into();
-            res.push(self.get_string(&item)?.into());
-        }
-        Ok(res)
-    }
-
-    fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_string_obj = java_obj_gen.l()?;
-            let jstr = JString::from(java_string_obj);
-            let val = env.get_string(&jstr)?;
-            Ok(val.to_str()?.to_string())
-        })
-    }
-
-    fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_list_obj = java_obj_gen.l()?;
-            env.get_strings(&java_list_obj)
-        })
-    }
-
-    fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_int_obj = java_obj_gen.l()?;
-            let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?;
-            let int_value = int_obj.i()?;
-            Ok(int_value)
-        })
-    }
-
-    fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_list_obj = java_obj_gen.l()?;
-            env.get_integers(&java_list_obj)
-        })
-    }
-
-    fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_long_obj = java_obj_gen.l()?;
-            let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
-            let long_value = long_obj.j()?;
-            Ok(long_value)
-        })
-    }
-
-    fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_long_obj = java_obj_gen.l()?;
-            let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
-            let long_value = long_obj.j()?;
-            Ok(long_value as u64)
-        })
-    }
-
-    fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>> {
-        self.get_optional(obj, |env, inner_obj| {
-            let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
-            let java_byte_buffer_obj = java_obj_gen.l()?;
-            let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj);
-            let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?;
-            let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?;
-            let data = unsafe { slice::from_raw_parts(raw_data, capacity) };
-            Ok(data)
-        })
-    }
-
-    fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
-    where
-        F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>,
-    {
-        if obj.is_null() {
-            return Ok(None);
-        }
-        let is_empty = self.call_method(obj, "isEmpty", "()Z", &[])?;
-        if is_empty.z()? {
-            // TODO(lu): put get java object into here cuz can only get java Object
-            Ok(None)
-        } else {
-            f(self, obj).map(Some)
-        }
-    }
-}
-
-#[no_mangle]
-pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts(
-    mut env: JNIEnv,
-    _obj: JObject,
-    list_obj: JObject, // List<Integer>
-) {
-    ok_or_throw_without_return!(env, env.get_integers(&list_obj));
-}
-
-#[no_mangle]
-pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt(
-    mut env: JNIEnv,
-    _obj: JObject,
-    list_obj: JObject, // Optional<List<Integer>>
-) {
-    ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj));
-}
--- a/java/core/lancedb-jni/src/lib.rs
+++ b/java/core/lancedb-jni/src/lib.rs
@@ -1,68 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use lazy_static::lazy_static;
-
-// TODO import from lance-jni without duplicate
-#[macro_export]
-macro_rules! ok_or_throw {
-    ($env:expr, $result:expr) => {
-        match $result {
-            Ok(value) => value,
-            Err(err) => {
-                Error::from(err).throw(&mut $env);
-                return JObject::null();
-            }
-        }
-    };
-}
-
-macro_rules! ok_or_throw_without_return {
-    ($env:expr, $result:expr) => {
-        match $result {
-            Ok(value) => value,
-            Err(err) => {
-                Error::from(err).throw(&mut $env);
-                return;
-            }
-        }
-    };
-}
-
-#[macro_export]
-macro_rules! ok_or_throw_with_return {
-    ($env:expr, $result:expr, $ret:expr) => {
-        match $result {
-            Ok(value) => value,
-            Err(err) => {
-                Error::from(err).throw(&mut $env);
-                return $ret;
-            }
-        }
-    };
-}
-
-mod connection;
-pub mod error;
-mod ffi;
-mod traits;
-
-pub use error::{Error, Result};
-
-lazy_static! {
-    static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .expect("Failed to create tokio runtime");
-}
--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -1,122 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use jni::objects::{JMap, JObject, JString, JValue};
-use jni::JNIEnv;
-
-use crate::Result;
-
-pub trait FromJObject<T> {
-    fn extract(&self) -> Result<T>;
-}
-
-/// Convert a Rust type into a Java Object.
-pub trait IntoJava {
-    fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>;
-}
-
-impl FromJObject<i32> for JObject<'_> {
-    fn extract(&self) -> Result<i32> {
-        Ok(JValue::from(self).i()?)
-    }
-}
-
-impl FromJObject<i64> for JObject<'_> {
-    fn extract(&self) -> Result<i64> {
-        Ok(JValue::from(self).j()?)
-    }
-}
-
-impl FromJObject<f32> for JObject<'_> {
-    fn extract(&self) -> Result<f32> {
-        Ok(JValue::from(self).f()?)
-    }
-}
-
-impl FromJObject<f64> for JObject<'_> {
-    fn extract(&self) -> Result<f64> {
-        Ok(JValue::from(self).d()?)
-    }
-}
-
-pub trait FromJString {
-    fn extract(&self, env: &mut JNIEnv) -> Result<String>;
-}
-
-impl FromJString for JString<'_> {
-    fn extract(&self, env: &mut JNIEnv) -> Result<String> {
-        Ok(env.get_string(self)?.into())
-    }
-}
-
-pub trait JMapExt {
-    #[allow(dead_code)]
-    fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>>;
-
-    #[allow(dead_code)]
-    fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>>;
-
-    #[allow(dead_code)]
-    fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>>;
-
-    #[allow(dead_code)]
-    fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>>;
-
-    #[allow(dead_code)]
-    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
-}
-
-fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
-where
-    for<'a> JObject<'a>: FromJObject<T>,
-{
-    let key_obj: JObject = env.new_string(key)?.into();
-    if let Some(value) = map.get(env, &key_obj)? {
-        if value.is_null() {
-            Ok(None)
-        } else {
-            Ok(Some(value.extract()?))
-        }
-    } else {
-        Ok(None)
-    }
-}
-
-impl JMapExt for JMap<'_, '_, '_> {
-    fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>> {
-        let key_obj: JObject = env.new_string(key)?.into();
-        if let Some(value) = self.get(env, &key_obj)? {
-            let value_str: JString = value.into();
-            Ok(Some(value_str.extract(env)?))
-        } else {
-            Ok(None)
-        }
-    }
-
-    fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>> {
-        get_map_value(env, self, key)
-    }
-
-    fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>> {
-        get_map_value(env, self, key)
-    }
-
-    fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>> {
-        get_map_value(env, self, key)
-    }
-
-    fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>> {
-        get_map_value(env, self, key)
-    }
-}
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -1,94 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>com.lancedb</groupId>
-        <artifactId>lancedb-parent</artifactId>
-        <version>0.1-SNAPSHOT</version>
-        <relativePath>../pom.xml</relativePath>
-    </parent>
-
-    <artifactId>lancedb-core</artifactId>
-    <name>LanceDB Core</name>
-    <packaging>jar</packaging>
-
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.arrow</groupId>
-            <artifactId>arrow-vector</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.arrow</groupId>
-            <artifactId>arrow-memory-netty</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.arrow</groupId>
-            <artifactId>arrow-c-data</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.arrow</groupId>
-            <artifactId>arrow-dataset</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.json</groupId>
-            <artifactId>json</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.questdb</groupId>
-            <artifactId>jar-jni</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.junit.jupiter</groupId>
-            <artifactId>junit-jupiter</artifactId>
-             <scope>test</scope>
-        </dependency>
-    </dependencies>
-
-    <profiles>
-        <profile>
-            <id>build-jni</id>
-            <activation>
-                <activeByDefault>true</activeByDefault>
-            </activation>
-            <build>
-                <plugins>
-                    <plugin>
-                        <groupId>org.questdb</groupId>
-                        <artifactId>rust-maven-plugin</artifactId>
-                        <version>1.1.1</version>
-                        <executions>
-                            <execution>
-                                <id>lancedb-jni</id>
-                                <goals>
-                                    <goal>build</goal>
-                                </goals>
-                                <configuration>
-                                    <path>lancedb-jni</path>
-                                    <!--<release>true</release>-->
-                                    <!-- Copy native libraries to target/classes for runtime access -->
-                                    <copyTo>${project.build.directory}/classes/nativelib</copyTo>
-                                    <copyWithPlatformDir>true</copyWithPlatformDir>
-                                </configuration>
-                            </execution>
-                            <execution>
-                                <id>lancedb-jni-test</id>
-                                <goals>
-                                    <goal>test</goal>
-                                </goals>
-                                <configuration>
-                                    <path>lancedb-jni</path>
-                                    <release>false</release>
-                                    <verbosity>-v</verbosity>
-                                </configuration>
-                            </execution>
-                        </executions>
-                    </plugin>
-                </plugins>
-            </build>
-        </profile>
-    </profiles>
-</project>
--- a/java/core/src/main/java/com/lancedb/lancedb/Connection.java
+++ b/java/core/src/main/java/com/lancedb/lancedb/Connection.java
@@ -1,120 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.lancedb.lancedb;
-
-import io.questdb.jar.jni.JarJniLoader;
-import java.io.Closeable;
-import java.util.List;
-import java.util.Optional;
-
-/**
- * Represents LanceDB database.
- */
-public class Connection implements Closeable {
-  static {
-    JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
-  }
-
-  private long nativeConnectionHandle;
-
-  /**
-   * Connect to a LanceDB instance.
-   */
-  public static native Connection connect(String uri);
-
-  /**
-   * Get the names of all tables in the database. The names are sorted in
-   * ascending order.
-   *
-   * @return the table names
-   */
-  public List<String> tableNames() {
-    return tableNames(Optional.empty(), Optional.empty());
-  }
-
-  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
-   *
-   * @param limit The number of results to return.
-   * @return the table names
-   */
-  public List<String> tableNames(int limit) {
-    return tableNames(Optional.empty(), Optional.of(limit));
-  }
-
-  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
-   *
-   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
-   * @return the table names
-   */
-  public List<String> tableNames(String startAfter) {
-    return tableNames(Optional.of(startAfter), Optional.empty());
-  }
-
-  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
-   *
-   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
-   * @param limit The number of results to return.
-   * @return the table names
-   */
-  public List<String> tableNames(String startAfter, int limit) {
-    return tableNames(Optional.of(startAfter), Optional.of(limit));
-  }
-
-  /**
-   * Get the names of filtered tables in the database. The names are sorted in
-   * ascending order.
-   *
-   * @param startAfter If present, only return names that come lexicographically after the supplied
-   *                   value. This can be combined with limit to implement pagination
-   *                   by setting this to the last table name from the previous page.
-   * @param limit The number of results to return.
-   * @return the table names
-   */
-  public native List<String> tableNames(
-      Optional<String> startAfter, Optional<Integer> limit);
-
-  /**
-   * Closes this connection and releases any system resources associated with it. If
-   * the connection is
-   * already closed, then invoking this method has no effect.
-   */
-  @Override
-  public void close() {
-    if (nativeConnectionHandle != 0) {
-      releaseNativeConnection(nativeConnectionHandle);
-      nativeConnectionHandle = 0;
-    }
-  }
-
-  /**
-   * Native method to release the Lance connection resources associated with the
-   * given handle.
-   *
-   * @param handle The native handle to the connection resource.
-   */
-  private native void releaseNativeConnection(long handle);
-
-  private Connection() {}
-}
--- a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
+++ b/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
@@ -1,135 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.lancedb.lancedb;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-import java.nio.file.Path;
-import java.util.List;
-import java.net.URL;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-
-public class ConnectionTest {
-  private static final String[] TABLE_NAMES = {
-      "dataset_version",
-      "new_empty_dataset",
-      "test",
-      "write_stream"
-  };
-
-  @TempDir
-  static Path tempDir; // Temporary directory for the tests
-  private static URL lanceDbURL;
-
-  @BeforeAll
-  static void setUp() {
-    ClassLoader classLoader = ConnectionTest.class.getClassLoader();
-    lanceDbURL = classLoader.getResource("example_db");
-  }
-
-  @Test
-  void emptyDB() {
-    String databaseUri = tempDir.resolve("emptyDB").toString();
-    try (Connection conn = Connection.connect(databaseUri)) {
-      List<String> tableNames = conn.tableNames();
-      assertTrue(tableNames.isEmpty());
-    }
-  }
-
-  @Test
-  void tableNames() {
-    try (Connection conn = Connection.connect(lanceDbURL.toString())) {
-      List<String> tableNames = conn.tableNames();
-      assertEquals(4, tableNames.size());
-      for (int i = 0; i < TABLE_NAMES.length; i++) {
-        assertEquals(TABLE_NAMES[i], tableNames.get(i));
-      }
-    }
-  }
-
-  @Test
-  void tableNamesStartAfter() {
-    try (Connection conn = Connection.connect(lanceDbURL.toString())) {
-      assertTableNamesStartAfter(conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
-      assertTableNamesStartAfter(conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
-      assertTableNamesStartAfter(conn, "z_dataset", 0);
-    }
-  }
-
-  private void assertTableNamesStartAfter(Connection conn, String startAfter, int expectedSize, String... expectedNames) {
-    List<String> tableNames = conn.tableNames(startAfter);
-    assertEquals(expectedSize, tableNames.size());
-    for (int i = 0; i < expectedNames.length; i++) {
-      assertEquals(expectedNames[i], tableNames.get(i));
-    }
-  }
-
-  @Test
-  void tableNamesLimit() {
-      try (Connection conn = Connection.connect(lanceDbURL.toString())) {
-      for (int i = 0; i <= TABLE_NAMES.length; i++) {
-        List<String> tableNames = conn.tableNames(i);
-        assertEquals(i, tableNames.size());
-        for (int j = 0; j < i; j++) {
-          assertEquals(TABLE_NAMES[j], tableNames.get(j));
-        }
-      }
-    }
-  }
-
-  @Test
-  void tableNamesStartAfterLimit() {
-    try (Connection conn = Connection.connect(lanceDbURL.toString())) {
-      List<String> tableNames = conn.tableNames(TABLE_NAMES[0], 2);
-      assertEquals(2, tableNames.size());
-      assertEquals(TABLE_NAMES[1], tableNames.get(0));
-      assertEquals(TABLE_NAMES[2], tableNames.get(1));
-      tableNames = conn.tableNames(TABLE_NAMES[1], 1);
-      assertEquals(1, tableNames.size());
-      assertEquals(TABLE_NAMES[2], tableNames.get(0));
-      tableNames = conn.tableNames(TABLE_NAMES[2], 2);
-      assertEquals(1, tableNames.size());
-      assertEquals(TABLE_NAMES[3], tableNames.get(0));
-      tableNames = conn.tableNames(TABLE_NAMES[3], 2);
-      assertEquals(0, tableNames.size());
-      tableNames = conn.tableNames(TABLE_NAMES[0], 0);
-      assertEquals(0, tableNames.size());
-
-      // Limit larger than the number of remaining tables
-      tableNames = conn.tableNames(TABLE_NAMES[0], 10);
-      assertEquals(3, tableNames.size());
-      assertEquals(TABLE_NAMES[1], tableNames.get(0));
-      assertEquals(TABLE_NAMES[2], tableNames.get(1));
-      assertEquals(TABLE_NAMES[3], tableNames.get(2));
-
-      // Start after a value not in the list
-      tableNames = conn.tableNames("non_existent_table", 2);
-      assertEquals(2, tableNames.size());
-      assertEquals(TABLE_NAMES[2], tableNames.get(0));
-      assertEquals(TABLE_NAMES[3], tableNames.get(1));
-
-      // Start after the last table with a limit
-      tableNames = conn.tableNames(TABLE_NAMES[3], 1);
-      assertEquals(0, tableNames.size());
-    }
-  }
-}
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_latest.manifest
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/0-d51afd07-e3cd-4c76-9b9b-787e13fd55b0.txn
@@ -1 +0,0 @@
-$d51afd07-e3cd-4c76-9b9b-787e13fd55b0<62>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/1-336c3e56-33fd-45d8-bbfb-95ebb563cbe0.txn
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_transactions/2-3344b369-7471-4e23-8865-c949b6e19bc2.txn
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/1.manifest
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/2.manifest
--- a/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/_versions/3.manifest
--- a/java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/data/60a9b599-f79f-48a8-bffa-b495762b622a.lance
--- a/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance
+++ b/java/core/src/test/resources/example_db/dataset_version.lance/data/a13f68ba-04e6-48b5-bec0-bf54444be5f0.lance
--- a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest
+++ b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_latest.manifest
--- a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn
+++ b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_transactions/0-15648e72-076f-4ef1-8b90-10d305b95b3b.txn
@@ -1 +0,0 @@
-$15648e72-076f-4ef1-8b90-10d305b95b3b<33>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08
--- a/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest
+++ b/java/core/src/test/resources/example_db/new_empty_dataset.lance/_versions/1.manifest
--- a/java/core/src/test/resources/example_db/test.lance/_latest.manifest
+++ b/java/core/src/test/resources/example_db/test.lance/_latest.manifest
--- a/java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn
+++ b/java/core/src/test/resources/example_db/test.lance/_transactions/0-a3689caf-4f6b-4afc-a3c7-97af75661843.txn
@@ -1 +0,0 @@
-$a3689caf-4f6b-4afc-a3c7-97af75661843<34>oitem <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string8price <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*double80vector <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*fixed_size_list:float:28
--- a/java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn
+++ b/java/core/src/test/resources/example_db/test.lance/_transactions/1-3f0fa7b9-7311-4945-9b0f-57dff4c04ee2.txn
--- a/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest
+++ b/java/core/src/test/resources/example_db/test.lance/_versions/1.manifest
--- a/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest
+++ b/java/core/src/test/resources/example_db/test.lance/_versions/2.manifest
--- a/java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance
+++ b/java/core/src/test/resources/example_db/test.lance/data/cd209a1b-00e0-4adf-93b2-2547c866e1ef.lance
--- a/java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest
+++ b/java/core/src/test/resources/example_db/write_stream.lance/_latest.manifest
--- a/java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn
+++ b/java/core/src/test/resources/example_db/write_stream.lance/_transactions/0-ea2f0479-36d1-4302-908a-dae45b9eb443.txn
--- a/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest
+++ b/java/core/src/test/resources/example_db/write_stream.lance/_versions/1.manifest
--- a/java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance
+++ b/java/core/src/test/resources/example_db/write_stream.lance/data/665ff491-6dc5-4496-b292-166ed5c2a309.lance
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1,129 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <groupId>com.lancedb</groupId>
-    <artifactId>lancedb-parent</artifactId>
-    <version>0.1-SNAPSHOT</version>
-    <packaging>pom</packaging>
-
-    <name>Lance Parent</name>
-
-    <properties>
-        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <maven.compiler.source>11</maven.compiler.source>
-        <maven.compiler.target>11</maven.compiler.target>
-        <arrow.version>15.0.0</arrow.version>
-    </properties>
-
-    <modules>
-        <module>core</module>
-    </modules>
-
-    <dependencyManagement>
-        <dependencies>
-            <dependency>
-                <groupId>org.apache.arrow</groupId>
-                <artifactId>arrow-vector</artifactId>
-                <version>${arrow.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.arrow</groupId>
-                <artifactId>arrow-memory-netty</artifactId>
-                <version>${arrow.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.arrow</groupId>
-                <artifactId>arrow-c-data</artifactId>
-                <version>${arrow.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.arrow</groupId>
-                <artifactId>arrow-dataset</artifactId>
-                <version>${arrow.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.questdb</groupId>
-                <artifactId>jar-jni</artifactId>
-                <version>1.1.1</version>
-            </dependency>
-            <dependency>
-                <groupId>org.junit.jupiter</groupId>
-                <artifactId>junit-jupiter</artifactId>
-                <version>5.10.1</version>
-            </dependency>
-            <dependency>
-                <groupId>org.json</groupId>
-                <artifactId>json</artifactId>
-                <version>20210307</version>
-            </dependency>
-        </dependencies>
-    </dependencyManagement>
-
-     <build>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-checkstyle-plugin</artifactId>
-                <version>3.3.1</version>
-                <configuration>
-                    <configLocation>google_checks.xml</configLocation>
-                    <consoleOutput>true</consoleOutput>
-                    <failsOnError>true</failsOnError>
-                    <violationSeverity>warning</violationSeverity>
-                    <linkXRef>false</linkXRef>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>validate</id>
-                        <phase>validate</phase>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <artifactId>maven-clean-plugin</artifactId>
-                    <version>3.1.0</version>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-resources-plugin</artifactId>
-                    <version>3.0.2</version>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-compiler-plugin</artifactId>
-                    <version>3.8.1</version>
-                    <configuration>
-                        <compilerArgs>
-                            <arg>-h</arg>
-                            <arg>target/headers</arg>
-                        </compilerArgs>
-                    </configuration>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-surefire-plugin</artifactId>
-                    <version>3.2.5</version>
-                    <configuration>
-                        <argLine>--add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
-                        <forkNode implementation="org.apache.maven.plugin.surefire.extensions.SurefireForkNodeFactory"/>
-                        <useSystemClassLoader>false</useSystemClassLoader>
-                    </configuration>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-jar-plugin</artifactId>
-                    <version>3.0.2</version>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-install-plugin</artifactId>
-                    <version>2.5.2</version>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-    </build>
-</project>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.5.0",
+  "version": "0.4.20",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.5.0",
+      "version": "0.4.20",
      "cpu": [
        "x64",
        "arm64"
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.5.0",
+  "version": "0.4.20",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -624,6 +624,8 @@ function validateSchemaEmbeddings(
  }

  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
+    console.log({ missingEmbeddingFields, embeddings });
+
    throw new Error(
      `Table has embeddings: "${missingEmbeddingFields
        .map((f) => f.name)
@@ -631,5 +633,5 @@ function validateSchemaEmbeddings(
    );
  }

-  return new Schema(fields, schema.metadata);
+  return new Schema(fields);
 }
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -31,7 +31,6 @@ import {
  Schema,
  Struct,
  type Table,
-  Type,
  Utf8,
  tableFromIPC,
 } from "apache-arrow";
@@ -52,12 +51,7 @@ import {
  makeArrowTable,
  makeEmptyTable,
 } from "../lancedb/arrow";
-import {
-  EmbeddingFunction,
-  FieldOptions,
-  FunctionOptions,
-} from "../lancedb/embedding/embedding_function";
-import { EmbeddingFunctionConfig } from "../lancedb/embedding/registry";
+import { type EmbeddingFunction } from "../lancedb/embedding/embedding_function";

 // biome-ignore lint/suspicious/noExplicitAny: skip
 function sampleRecords(): Array<Record<string, any>> {
@@ -286,46 +280,23 @@ describe("The function makeArrowTable", function () {
  });
 });

-class DummyEmbedding extends EmbeddingFunction<string> {
-  toJSON(): Partial<FunctionOptions> {
-    return {};
-  }
+class DummyEmbedding implements EmbeddingFunction<string> {
+  public readonly sourceColumn = "string";
+  public readonly embeddingDimension = 2;
+  public readonly embeddingDataType = new Float16();

-  async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
-    return data.map(() => [0.0, 0.0]);
-  }
-
-  ndims(): number {
-    return 2;
-  }
-
-  embeddingDataType() {
-    return new Float16();
-  }
-}
-
-class DummyEmbeddingWithNoDimension extends EmbeddingFunction<string> {
-  toJSON(): Partial<FunctionOptions> {
-    return {};
-  }
-
-  embeddingDataType(): Float {
-    return new Float16();
-  }
-
-  async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
+  async embed(data: string[]): Promise<number[][]> {
    return data.map(() => [0.0, 0.0]);
  }
 }
-const dummyEmbeddingConfig: EmbeddingFunctionConfig = {
-  sourceColumn: "string",
-  function: new DummyEmbedding(),
-};

-const dummyEmbeddingConfigWithNoDimension: EmbeddingFunctionConfig = {
-  sourceColumn: "string",
-  function: new DummyEmbeddingWithNoDimension(),
-};
+class DummyEmbeddingWithNoDimension implements EmbeddingFunction<string> {
+  public readonly sourceColumn = "string";
+
+  async embed(data: string[]): Promise<number[][]> {
+    return data.map(() => [0.0, 0.0]);
+  }
+}

 describe("convertToTable", function () {
  it("will infer data types correctly", async function () {
@@ -360,7 +331,7 @@ describe("convertToTable", function () {

  it("will apply embeddings", async function () {
    const records = sampleRecords();
-    const table = await convertToTable(records, dummyEmbeddingConfig);
+    const table = await convertToTable(records, new DummyEmbedding());
    expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(true);
    expect(table.getChild("vector")?.type.children[0].type.toString()).toEqual(
      new Float16().toString(),
@@ -369,7 +340,7 @@ describe("convertToTable", function () {

  it("will fail if missing the embedding source column", async function () {
    await expect(
-      convertToTable([{ id: 1 }], dummyEmbeddingConfig),
+      convertToTable([{ id: 1 }], new DummyEmbedding()),
    ).rejects.toThrow("'string' was not present");
  });

@@ -380,7 +351,7 @@ describe("convertToTable", function () {
    const table = makeEmptyTable(schema);

    // If the embedding specifies the dimension we are fine
-    await fromTableToBuffer(table, dummyEmbeddingConfig);
+    await fromTableToBuffer(table, new DummyEmbedding());

    // We can also supply a schema and should be ok
    const schemaWithEmbedding = new Schema([
@@ -393,13 +364,13 @@ describe("convertToTable", function () {
    ]);
    await fromTableToBuffer(
      table,
-      dummyEmbeddingConfigWithNoDimension,
+      new DummyEmbeddingWithNoDimension(),
      schemaWithEmbedding,
    );

    // Otherwise we will get an error
    await expect(
-      fromTableToBuffer(table, dummyEmbeddingConfigWithNoDimension),
+      fromTableToBuffer(table, new DummyEmbeddingWithNoDimension()),
    ).rejects.toThrow("does not specify `embeddingDimension`");
  });

@@ -412,7 +383,7 @@ describe("convertToTable", function () {
        false,
      ),
    ]);
-    const table = await convertToTable([], dummyEmbeddingConfig, { schema });
+    const table = await convertToTable([], new DummyEmbedding(), { schema });
    expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(true);
    expect(table.getChild("vector")?.type.children[0].type.toString()).toEqual(
      new Float16().toString(),
@@ -422,17 +393,16 @@ describe("convertToTable", function () {
  it("will complain if embeddings present but schema missing embedding column", async function () {
    const schema = new Schema([new Field("string", new Utf8(), false)]);
    await expect(
-      convertToTable([], dummyEmbeddingConfig, { schema }),
+      convertToTable([], new DummyEmbedding(), { schema }),
    ).rejects.toThrow("column vector was missing");
  });

  it("will provide a nice error if run twice", async function () {
    const records = sampleRecords();
-    const table = await convertToTable(records, dummyEmbeddingConfig);
-
+    const table = await convertToTable(records, new DummyEmbedding());
    // fromTableToBuffer will try and apply the embeddings again
    await expect(
-      fromTableToBuffer(table, dummyEmbeddingConfig),
+      fromTableToBuffer(table, new DummyEmbedding()),
    ).rejects.toThrow("already existed");
  });
 });
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -13,6 +13,7 @@
 // limitations under the License.

 import * as tmp from "tmp";
+
 import { Connection, connect } from "../lancedb";

 describe("when connecting", () => {
--- a/nodejs/test/registry.test.ts
+++ b/nodejs/test/registry.test.ts
@@ -1,169 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-import * as arrow from "apache-arrow";
-import * as arrowOld from "apache-arrow-old";
-
-import * as tmp from "tmp";
-
-import { connect } from "../lancedb";
-import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
-import { getRegistry, register } from "../lancedb/embedding/registry";
-
-describe.each([arrow, arrowOld])("LanceSchema", (arrow) => {
-  test("should preserve input order", async () => {
-    const schema = LanceSchema({
-      id: new arrow.Int32(),
-      text: new arrow.Utf8(),
-      vector: new arrow.Float32(),
-    });
-    expect(schema.fields.map((x) => x.name)).toEqual(["id", "text", "vector"]);
-  });
-});
-
-describe("Registry", () => {
-  let tmpDir: tmp.DirResult;
-  beforeEach(() => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-  });
-
-  afterEach(() => {
-    tmpDir.removeCallback();
-    getRegistry().reset();
-  });
-
-  it("should register a new item to the registry", async () => {
-    @register("mock-embedding")
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
-      constructor() {
-        super();
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): arrow.Float {
-        return new arrow.Float32();
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return data.map(() => [1, 2, 3]);
-      }
-    }
-    const func = getRegistry()
-      .get<MockEmbeddingFunction>("mock-embedding")!
-      .create();
-
-    const schema = LanceSchema({
-      id: new arrow.Int32(),
-      text: func.sourceField(new arrow.Utf8()),
-      vector: func.vectorField(),
-    });
-
-    const db = await connect(tmpDir.name);
-    const table = await db.createTable(
-      "test",
-      [
-        { id: 1, text: "hello" },
-        { id: 2, text: "world" },
-      ],
-      { schema },
-    );
-    const expected = [
-      [1, 2, 3],
-      [1, 2, 3],
-    ];
-    const actual = await table.query().toArrow();
-    const vectors = actual
-      .getChild("vector")
-      ?.toArray()
-      .map((x: unknown) => {
-        if (x instanceof arrow.Vector) {
-          return [...x];
-        } else {
-          return x;
-        }
-      });
-    expect(vectors).toEqual(expected);
-  });
-  test("should error if registering with the same name", async () => {
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
-      constructor() {
-        super();
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): arrow.Float {
-        return new arrow.Float32();
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return data.map(() => [1, 2, 3]);
-      }
-    }
-    register("mock-embedding")(MockEmbeddingFunction);
-    expect(() => register("mock-embedding")(MockEmbeddingFunction)).toThrow(
-      'Embedding function with alias "mock-embedding" already exists',
-    );
-  });
-  test("schema should contain correct metadata", async () => {
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
-      constructor() {
-        super();
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): arrow.Float {
-        return new arrow.Float32();
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return data.map(() => [1, 2, 3]);
-      }
-    }
-    const func = new MockEmbeddingFunction();
-
-    const schema = LanceSchema({
-      id: new arrow.Int32(),
-      text: func.sourceField(new arrow.Utf8()),
-      vector: func.vectorField(),
-    });
-    const expectedMetadata = new Map<string, string>([
-      [
-        "embedding_functions",
-        JSON.stringify([
-          {
-            sourceColumn: "text",
-            vectorColumn: "vector",
-            name: "MockEmbeddingFunction",
-            model: { someText: "hello" },
-          },
-        ]),
-      ],
-    ]);
-    expect(schema.metadata).toEqual(expectedMetadata);
-  });
-});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -16,34 +16,23 @@ import * as fs from "fs";
 import * as path from "path";
 import * as tmp from "tmp";

-import * as arrow from "apache-arrow";
-import * as arrowOld from "apache-arrow-old";
-
-import { Table, connect } from "../lancedb";
 import {
  Field,
  FixedSizeList,
-  Float,
  Float32,
  Float64,
  Int32,
  Int64,
  Schema,
-  Utf8,
-  makeArrowTable,
-} from "../lancedb/arrow";
-import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
-import { getRegistry, register } from "../lancedb/embedding/registry";
+} from "apache-arrow";
+import { Table, connect } from "../lancedb";
+import { makeArrowTable } from "../lancedb/arrow";
 import { Index } from "../lancedb/indices";

-// biome-ignore lint/suspicious/noExplicitAny: <explanation>
-describe.each([arrow, arrowOld])("Given a table", (arrow: any) => {
+describe("Given a table", () => {
  let tmpDir: tmp.DirResult;
  let table: Table;
-
-  const schema = new arrow.Schema([
-    new arrow.Field("id", new arrow.Float64(), true),
-  ]);
+  const schema = new Schema([new Field("id", new Float64(), true)]);
  beforeEach(async () => {
    tmpDir = tmp.dirSync({ unsafeCleanup: true });
    const conn = await connect(tmpDir.name);
@@ -430,186 +419,3 @@ describe("when dealing with versioning", () => {
    );
  });
 });
-
-describe("embedding functions", () => {
-  let tmpDir: tmp.DirResult;
-  beforeEach(() => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-  });
-  afterEach(() => tmpDir.removeCallback());
-
-  it("should be able to create a table with an embedding function", async () => {
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): Float {
-        return new Float32();
-      }
-      async computeQueryEmbeddings(_data: string) {
-        return [1, 2, 3];
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return Array.from({ length: data.length }).fill([
-          1, 2, 3,
-        ]) as number[][];
-      }
-    }
-    const func = new MockEmbeddingFunction();
-    const db = await connect(tmpDir.name);
-    const table = await db.createTable(
-      "test",
-      [
-        { id: 1, text: "hello" },
-        { id: 2, text: "world" },
-      ],
-      {
-        embeddingFunction: {
-          function: func,
-          sourceColumn: "text",
-        },
-      },
-    );
-    // biome-ignore lint/suspicious/noExplicitAny: test
-    const arr = (await table.query().toArray()) as any;
-    expect(arr[0].vector).toBeDefined();
-
-    // we round trip through JSON to make sure the vector properly gets converted to an array
-    // otherwise it'll be a TypedArray or Vector
-    const vector0 = JSON.parse(JSON.stringify(arr[0].vector));
-    expect(vector0).toEqual([1, 2, 3]);
-  });
-
-  it("should be able to create an empty table with an embedding function", async () => {
-    @register()
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): Float {
-        return new Float32();
-      }
-      async computeQueryEmbeddings(_data: string) {
-        return [1, 2, 3];
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return Array.from({ length: data.length }).fill([
-          1, 2, 3,
-        ]) as number[][];
-      }
-    }
-    const schema = new Schema([
-      new Field("text", new Utf8(), true),
-      new Field(
-        "vector",
-        new FixedSizeList(3, new Field("item", new Float32(), true)),
-        true,
-      ),
-    ]);
-
-    const func = new MockEmbeddingFunction();
-    const db = await connect(tmpDir.name);
-    const table = await db.createEmptyTable("test", schema, {
-      embeddingFunction: {
-        function: func,
-        sourceColumn: "text",
-      },
-    });
-    const outSchema = await table.schema();
-    expect(outSchema.metadata.get("embedding_functions")).toBeDefined();
-    await table.add([{ text: "hello world" }]);
-
-    // biome-ignore lint/suspicious/noExplicitAny: test
-    const arr = (await table.query().toArray()) as any;
-    expect(arr[0].vector).toBeDefined();
-
-    // we round trip through JSON to make sure the vector properly gets converted to an array
-    // otherwise it'll be a TypedArray or Vector
-    const vector0 = JSON.parse(JSON.stringify(arr[0].vector));
-    expect(vector0).toEqual([1, 2, 3]);
-  });
-  it("should error when appending to a table with an unregistered embedding function", async () => {
-    @register("mock")
-    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
-      ndims() {
-        return 3;
-      }
-      embeddingDataType(): Float {
-        return new Float32();
-      }
-      async computeQueryEmbeddings(_data: string) {
-        return [1, 2, 3];
-      }
-      async computeSourceEmbeddings(data: string[]) {
-        return Array.from({ length: data.length }).fill([
-          1, 2, 3,
-        ]) as number[][];
-      }
-    }
-    const func = getRegistry().get<MockEmbeddingFunction>("mock")!.create();
-
-    const schema = LanceSchema({
-      id: new arrow.Float64(),
-      text: func.sourceField(new Utf8()),
-      vector: func.vectorField(),
-    });
-
-    const db = await connect(tmpDir.name);
-    await db.createTable(
-      "test",
-      [
-        { id: 1, text: "hello" },
-        { id: 2, text: "world" },
-      ],
-      {
-        schema,
-      },
-    );
-
-    getRegistry().reset();
-    const db2 = await connect(tmpDir.name);
-
-    const tbl = await db2.openTable("test");
-
-    expect(tbl.add([{ id: 3, text: "hello" }])).rejects.toThrow(
-      `Function "mock" not found in registry`,
-    );
-  });
-});
-
-describe("when optimizing a dataset", () => {
-  let tmpDir: tmp.DirResult;
-  let table: Table;
-  beforeEach(async () => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-    const con = await connect(tmpDir.name);
-    table = await con.createTable("vectors", [{ id: 1 }]);
-    await table.add([{ id: 2 }]);
-  });
-  afterEach(() => {
-    tmpDir.removeCallback();
-  });
-
-  it("compacts files", async () => {
-    const stats = await table.optimize();
-    expect(stats.compaction.filesAdded).toBe(1);
-    expect(stats.compaction.filesRemoved).toBe(2);
-    expect(stats.compaction.fragmentsAdded).toBe(1);
-    expect(stats.compaction.fragmentsRemoved).toBe(2);
-  });
-
-  it("cleanups old versions", async () => {
-    const stats = await table.optimize({ cleanupOlderThan: new Date() });
-    expect(stats.prune.bytesRemoved).toBeGreaterThan(0);
-    expect(stats.prune.oldVersionsRemoved).toBe(3);
-  });
-});
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -48,7 +48,7 @@
        "noUnsafeFinally": "error",
        "noUnsafeOptionalChaining": "error",
        "noUnusedLabels": "error",
-        "noUnusedVariables": "warn",
+        "noUnusedVariables": "error",
        "useIsNan": "error",
        "useValidForDirection": "error",
        "useYield": "error"
@@ -101,13 +101,7 @@
  },
  "overrides": [
    {
-      "include": [
-        "**/*.ts",
-        "**/*.tsx",
-        "**/*.mts",
-        "**/*.cts",
-        "__test__/*.test.ts"
-      ],
+      "include": ["**/*.ts", "**/*.tsx", "**/*.mts", "**/*.cts"],
      "linter": {
        "rules": {
          "correctness": {
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -17,14 +17,10 @@ import {
  Binary,
  DataType,
  Field,
-  FixedSizeBinary,
  FixedSizeList,
-  Float,
+  type Float,
  Float32,
-  Int,
-  LargeBinary,
  List,
-  Null,
  RecordBatch,
  RecordBatchFileWriter,
  RecordBatchStreamWriter,
@@ -38,99 +34,7 @@ import {
  vectorFromArray,
 } from "apache-arrow";
 import { type EmbeddingFunction } from "./embedding/embedding_function";
-import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
-import { sanitizeField, sanitizeSchema, sanitizeType } from "./sanitize";
-export * from "apache-arrow";
-
-export function isArrowTable(value: object): value is ArrowTable {
-  if (value instanceof ArrowTable) return true;
-  return "schema" in value && "batches" in value;
-}
-
-export function isDataType(value: unknown): value is DataType {
-  return (
-    value instanceof DataType ||
-    DataType.isNull(value) ||
-    DataType.isInt(value) ||
-    DataType.isFloat(value) ||
-    DataType.isBinary(value) ||
-    DataType.isLargeBinary(value) ||
-    DataType.isUtf8(value) ||
-    DataType.isLargeUtf8(value) ||
-    DataType.isBool(value) ||
-    DataType.isDecimal(value) ||
-    DataType.isDate(value) ||
-    DataType.isTime(value) ||
-    DataType.isTimestamp(value) ||
-    DataType.isInterval(value) ||
-    DataType.isDuration(value) ||
-    DataType.isList(value) ||
-    DataType.isStruct(value) ||
-    DataType.isUnion(value) ||
-    DataType.isFixedSizeBinary(value) ||
-    DataType.isFixedSizeList(value) ||
-    DataType.isMap(value) ||
-    DataType.isDictionary(value)
-  );
-}
-export function isNull(value: unknown): value is Null {
-  return value instanceof Null || DataType.isNull(value);
-}
-export function isInt(value: unknown): value is Int {
-  return value instanceof Int || DataType.isInt(value);
-}
-export function isFloat(value: unknown): value is Float {
-  return value instanceof Float || DataType.isFloat(value);
-}
-export function isBinary(value: unknown): value is Binary {
-  return value instanceof Binary || DataType.isBinary(value);
-}
-export function isLargeBinary(value: unknown): value is LargeBinary {
-  return value instanceof LargeBinary || DataType.isLargeBinary(value);
-}
-export function isUtf8(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isUtf8(value);
-}
-export function isLargeUtf8(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isLargeUtf8(value);
-}
-export function isBool(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isBool(value);
-}
-export function isDecimal(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isDecimal(value);
-}
-export function isDate(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isDate(value);
-}
-export function isTime(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isTime(value);
-}
-export function isTimestamp(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isTimestamp(value);
-}
-export function isInterval(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isInterval(value);
-}
-export function isDuration(value: unknown): value is Utf8 {
-  return value instanceof Utf8 || DataType.isDuration(value);
-}
-export function isList(value: unknown): value is List {
-  return value instanceof List || DataType.isList(value);
-}
-export function isStruct(value: unknown): value is Struct {
-  return value instanceof Struct || DataType.isStruct(value);
-}
-export function isUnion(value: unknown): value is Struct {
-  return value instanceof Struct || DataType.isUnion(value);
-}
-export function isFixedSizeBinary(value: unknown): value is FixedSizeBinary {
-  return value instanceof FixedSizeBinary || DataType.isFixedSizeBinary(value);
-}
-
-export function isFixedSizeList(value: unknown): value is FixedSizeList {
-  return value instanceof FixedSizeList || DataType.isFixedSizeList(value);
-}
+import { sanitizeSchema } from "./sanitize";

 /** Data type accepted by NodeJS SDK */
 export type Data = Record<string, unknown>[] | ArrowTable;
@@ -294,7 +198,6 @@ export class MakeArrowTableOptions {
 export function makeArrowTable(
  data: Array<Record<string, unknown>>,
  options?: Partial<MakeArrowTableOptions>,
-  metadata?: Map<string, string>,
 ): ArrowTable {
  if (
    data.length === 0 &&
@@ -387,41 +290,20 @@ export function makeArrowTable(
    // `new ArrowTable(schema, batches)` which does not do any schema inference
    const firstTable = new ArrowTable(columns);
    const batchesFixed = firstTable.batches.map(
+      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
      (batch) => new RecordBatch(opt.schema!, batch.data),
    );
-    let schema: Schema;
-    if (metadata !== undefined) {
-      let schemaMetadata = opt.schema.metadata;
-      if (schemaMetadata.size === 0) {
-        schemaMetadata = metadata;
-      } else {
-        for (const [key, entry] of schemaMetadata.entries()) {
-          schemaMetadata.set(key, entry);
-        }
-      }
-
-      schema = new Schema(opt.schema.fields, schemaMetadata);
-    } else {
-      schema = opt.schema;
-    }
-    return new ArrowTable(schema, batchesFixed);
+    return new ArrowTable(opt.schema, batchesFixed);
+  } else {
+    return new ArrowTable(columns);
  }
-  const tbl = new ArrowTable(columns);
-  if (metadata !== undefined) {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    (<any>tbl.schema).metadata = metadata;
-  }
-  return tbl;
 }

 /**
 * Create an empty Arrow table with the provided schema
 */
-export function makeEmptyTable(
-  schema: Schema,
-  metadata?: Map<string, string>,
-): ArrowTable {
-  return makeArrowTable([], { schema }, metadata);
+export function makeEmptyTable(schema: Schema): ArrowTable {
+  return makeArrowTable([], { schema });
 }

 /**
@@ -493,75 +375,13 @@ function makeVector(
  }
 }

-/** Helper function to apply embeddings from metadata to an input table */
-async function applyEmbeddingsFromMetadata(
-  table: ArrowTable,
-  schema: Schema,
-): Promise<ArrowTable> {
-  const registry = getRegistry();
-  const functions = registry.parseFunctions(schema.metadata);
-
-  const columns = Object.fromEntries(
-    table.schema.fields.map((field) => [
-      field.name,
-      table.getChild(field.name)!,
-    ]),
-  );
-
-  for (const functionEntry of functions.values()) {
-    const sourceColumn = columns[functionEntry.sourceColumn];
-    const destColumn = functionEntry.vectorColumn ?? "vector";
-    if (sourceColumn === undefined) {
-      throw new Error(
-        `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
-      );
-    }
-    if (columns[destColumn] !== undefined) {
-      throw new Error(
-        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
-      );
-    }
-    if (table.batches.length > 1) {
-      throw new Error(
-        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
-      );
-    }
-    const values = sourceColumn.toArray();
-
-    const vectors =
-      await functionEntry.function.computeSourceEmbeddings(values);
-    if (vectors.length !== values.length) {
-      throw new Error(
-        "Embedding function did not return an embedding for each input element",
-      );
-    }
-    let destType: DataType;
-    const dtype = schema.fields.find((f) => f.name === destColumn)!.type;
-    if (isFixedSizeList(dtype)) {
-      destType = sanitizeType(dtype);
-    } else {
-      throw new Error(
-        "Expected FixedSizeList as datatype for vector field, instead got: " +
-          dtype,
-      );
-    }
-
-    const vector = makeVector(vectors, destType);
-    columns[destColumn] = vector;
-  }
-  const newTable = new ArrowTable(columns);
-  return alignTable(newTable, schema);
-}
-
 /** Helper function to apply embeddings to an input table */
 async function applyEmbeddings<T>(
  table: ArrowTable,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<ArrowTable> {
-  if (schema?.metadata.has("embedding_functions")) {
-    return applyEmbeddingsFromMetadata(table, schema!);
-  } else if (embeddings == null || embeddings === undefined) {
+  if (embeddings == null) {
    return table;
  }

@@ -579,9 +399,8 @@ async function applyEmbeddings<T>(
  const newColumns = Object.fromEntries(colEntries);

  const sourceColumn = newColumns[embeddings.sourceColumn];
-  const destColumn = embeddings.vectorColumn ?? "vector";
-  const innerDestType =
-    embeddings.function.embeddingDataType() ?? new Float32();
+  const destColumn = embeddings.destColumn ?? "vector";
+  const innerDestType = embeddings.embeddingDataType ?? new Float32();
  if (sourceColumn === undefined) {
    throw new Error(
      `Cannot apply embedding function because the source column '${embeddings.sourceColumn}' was not present in the data`,
@@ -595,9 +414,11 @@ async function applyEmbeddings<T>(
      // if we call convertToTable with 0 records and a schema that includes the embedding
      return table;
    }
-    const dimensions = embeddings.function.ndims();
-    if (dimensions !== undefined) {
-      const destType = newVectorType(dimensions, innerDestType);
+    if (embeddings.embeddingDimension !== undefined) {
+      const destType = newVectorType(
+        embeddings.embeddingDimension,
+        innerDestType,
+      );
      newColumns[destColumn] = makeVector([], destType);
    } else if (schema != null) {
      const destField = schema.fields.find((f) => f.name === destColumn);
@@ -625,9 +446,7 @@ async function applyEmbeddings<T>(
      );
    }
    const values = sourceColumn.toArray();
-    const vectors = await embeddings.function.computeSourceEmbeddings(
-      values as T[],
-    );
+    const vectors = await embeddings.embed(values as T[]);
    if (vectors.length !== values.length) {
      throw new Error(
        "Embedding function did not return an embedding for each input element",
@@ -667,9 +486,9 @@ async function applyEmbeddings<T>(
 * embedding columns.  If no schema is provded then embedding columns will
 * be placed at the end of the table, after all of the input columns.
 */
-export async function convertToTable(
+export async function convertToTable<T>(
  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  makeTableOptions?: Partial<MakeArrowTableOptions>,
 ): Promise<ArrowTable> {
  const table = makeArrowTable(data, makeTableOptions);
@@ -677,13 +496,13 @@ export async function convertToTable(
 }

 /** Creates the Arrow Type for a Vector column with dimension `dim` */
-export function newVectorType<T extends Float>(
+function newVectorType<T extends Float>(
  dim: number,
  innerType: T,
 ): FixedSizeList<T> {
  // in Lance we always default to have the elements nullable, so we need to set it to true
  // otherwise we often get schema mismatches because the stored data always has schema with nullable elements
-  const children = new Field("item", <T>sanitizeType(innerType), true);
+  const children = new Field<T>("item", innerType, true);
  return new FixedSizeList(dim, children);
 }

@@ -694,9 +513,9 @@ export function newVectorType<T extends Float>(
 *
 * `schema` is required if data is empty
 */
-export async function fromRecordsToBuffer(
+export async function fromRecordsToBuffer<T>(
  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<Buffer> {
  if (schema !== undefined && schema !== null) {
@@ -714,9 +533,9 @@ export async function fromRecordsToBuffer(
 *
 * `schema` is required if data is empty
 */
-export async function fromRecordsToStreamBuffer(
+export async function fromRecordsToStreamBuffer<T>(
  data: Array<Record<string, unknown>>,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<Buffer> {
  if (schema !== undefined && schema !== null) {
@@ -735,9 +554,9 @@ export async function fromRecordsToStreamBuffer(
 *
 * `schema` is required if the table is empty
 */
-export async function fromTableToBuffer(
+export async function fromTableToBuffer<T>(
  table: ArrowTable,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<Buffer> {
  if (schema !== undefined && schema !== null) {
@@ -756,19 +575,19 @@ export async function fromTableToBuffer(
 *
 * `schema` is required if the table is empty
 */
-export async function fromDataToBuffer(
+export async function fromDataToBuffer<T>(
  data: Data,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<Buffer> {
  if (schema !== undefined && schema !== null) {
    schema = sanitizeSchema(schema);
  }
-  if (isArrowTable(data)) {
+  if (data instanceof ArrowTable) {
    return fromTableToBuffer(data, embeddings, schema);
  } else {
-    const table = await convertToTable(data, embeddings, { schema });
-    return fromTableToBuffer(table);
+    const table = await convertToTable(data);
+    return fromTableToBuffer(table, embeddings, schema);
  }
 }

@@ -780,9 +599,9 @@ export async function fromDataToBuffer(
 *
 * `schema` is required if the table is empty
 */
-export async function fromTableToStreamBuffer(
+export async function fromTableToStreamBuffer<T>(
  table: ArrowTable,
-  embeddings?: EmbeddingFunctionConfig,
+  embeddings?: EmbeddingFunction<T>,
  schema?: Schema,
 ): Promise<Buffer> {
  const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
@@ -845,25 +664,10 @@ function validateSchemaEmbeddings(
  // if it does not, we add it to the list of missing embedding fields
  // Finally, we check if those missing embedding fields are `this._embeddings`
  // if they are not, we throw an error
-  for (let field of schema.fields) {
-    if (isFixedSizeList(field.type)) {
-      field = sanitizeField(field);
-
+  for (const field of schema.fields) {
+    if (field.type instanceof FixedSizeList) {
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
-        if (schema.metadata.has("embedding_functions")) {
-          const embeddings = JSON.parse(
-            schema.metadata.get("embedding_functions")!,
-          );
-          if (
-            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
-            undefined
-          ) {
-            missingEmbeddingFields.push(field);
-          }
-        } else {
-          missingEmbeddingFields.push(field);
-        }
+        missingEmbeddingFields.push(field);
      } else {
        fields.push(field);
      }
@@ -873,6 +677,8 @@ function validateSchemaEmbeddings(
  }

  if (missingEmbeddingFields.length > 0 && embeddings === undefined) {
+    console.log({ missingEmbeddingFields, embeddings });
+
    throw new Error(
      `Table has embeddings: "${missingEmbeddingFields
        .map((f) => f.name)
@@ -880,5 +686,5 @@ function validateSchemaEmbeddings(
    );
  }

-  return new Schema(fields, schema.metadata);
+  return new Schema(fields);
 }
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -12,14 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import { Table as ArrowTable, Schema } from "./arrow";
-import {
-  fromTableToBuffer,
-  isArrowTable,
-  makeArrowTable,
-  makeEmptyTable,
-} from "./arrow";
-import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
+import { Table as ArrowTable, Schema } from "apache-arrow";
+import { fromTableToBuffer, makeArrowTable, makeEmptyTable } from "./arrow";
 import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
 import { Table } from "./table";

@@ -71,8 +65,6 @@ export interface CreateTableOptions {
   * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
   */
  storageOptions?: Record<string, string>;
-  schema?: Schema;
-  embeddingFunction?: EmbeddingFunctionConfig;
 }

 export interface OpenTableOptions {
@@ -182,7 +174,6 @@ export class Connection {
      cleanseStorageOptions(options?.storageOptions),
      options?.indexCacheSize,
    );
-
    return new Table(innerTable);
  }

@@ -205,24 +196,18 @@ export class Connection {
    }

    let table: ArrowTable;
-    if (isArrowTable(data)) {
+    if (data instanceof ArrowTable) {
      table = data;
    } else {
-      table = makeArrowTable(data, options);
+      table = makeArrowTable(data);
    }
-
-    const buf = await fromTableToBuffer(
-      table,
-      options?.embeddingFunction,
-      options?.schema,
-    );
+    const buf = await fromTableToBuffer(table);
    const innerTable = await this.inner.createTable(
      name,
      buf,
      mode,
      cleanseStorageOptions(options?.storageOptions),
    );
-
    return new Table(innerTable);
  }

@@ -242,14 +227,8 @@ export class Connection {
    if (mode === "create" && existOk) {
      mode = "exist_ok";
    }
-    let metadata: Map<string, string> | undefined = undefined;
-    if (options?.embeddingFunction !== undefined) {
-      const embeddingFunction = options.embeddingFunction;
-      const registry = getRegistry();
-      metadata = registry.getTableMetadata([embeddingFunction]);
-    }

-    const table = makeEmptyTable(schema, metadata);
+    const table = makeEmptyTable(schema);
    const buf = await fromTableToBuffer(table);
    const innerTable = await this.inner.createEmptyTable(
      name,
--- a/nodejs/lancedb/embedding/embedding_function.ts
+++ b/nodejs/lancedb/embedding/embedding_function.ts
@@ -1,4 +1,4 @@
-// Copyright 2024 Lance Developers.
+// Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,151 +12,67 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import "reflect-metadata";
-import {
-  DataType,
-  Field,
-  FixedSizeList,
-  Float,
-  Float32,
-  isDataType,
-  isFixedSizeList,
-  isFloat,
-  newVectorType,
-} from "../arrow";
-import { sanitizeType } from "../sanitize";
-
-/**
- * Options for a given embedding function
- */
-export interface FunctionOptions {
-  // biome-ignore lint/suspicious/noExplicitAny: options can be anything
-  [key: string]: any;
-}
+import { type Float } from "apache-arrow";

 /**
 * An embedding function that automatically creates vector representation for a given column.
 */
-export abstract class EmbeddingFunction<
-  // biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
-  T = any,
-  M extends FunctionOptions = FunctionOptions,
-> {
+export interface EmbeddingFunction<T> {
  /**
-   * Convert the embedding function to a JSON object
-   * It is used to serialize the embedding function to the schema
-   * It's important that any object returned by this method contains all the necessary
-   * information to recreate the embedding function
-   *
-   * It should return the same object that was passed to the constructor
-   * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
-   *
-   * @example
-   * ```ts
-   * class MyEmbeddingFunction extends EmbeddingFunction {
-   *   constructor(options: {model: string, timeout: number}) {
-   *     super();
-   *     this.model = options.model;
-   *     this.timeout = options.timeout;
-   *   }
-   *   toJSON() {
-   *     return {
-   *       model: this.model,
-   *       timeout: this.timeout,
-   *     };
-   * }
-   * ```
+   * The name of the column that will be used as input for the Embedding Function.
   */
-  abstract toJSON(): Partial<M>;
+  sourceColumn: string;

  /**
-   * sourceField is used in combination with `LanceSchema` to provide a declarative data model
+   * The data type of the embedding
   *
-   * @param optionsOrDatatype - The options for the field or the datatype
-   *
-   * @see {@link lancedb.LanceSchema}
+   * The embedding function should return `number`.  This will be converted into
+   * an Arrow float array.  By default this will be Float32 but this property can
+   * be used to control the conversion.
   */
-  sourceField(
-    optionsOrDatatype: Partial<FieldOptions> | DataType,
-  ): [DataType, Map<string, EmbeddingFunction>] {
-    let datatype = isDataType(optionsOrDatatype)
-      ? optionsOrDatatype
-      : optionsOrDatatype?.datatype;
-    if (!datatype) {
-      throw new Error("Datatype is required");
-    }
-    datatype = sanitizeType(datatype);
-    const metadata = new Map<string, EmbeddingFunction>();
-    metadata.set("source_column_for", this);
-
-    return [datatype, metadata];
-  }
+  embeddingDataType?: Float;

  /**
-   * vectorField is used in combination with `LanceSchema` to provide a declarative data model
+   * The dimension of the embedding
   *
-   * @param options - The options for the field
-   *
-   * @see {@link lancedb.LanceSchema}
+   * This is optional, normally this can be determined by looking at the results of
+   * `embed`.  If this is not specified, and there is an attempt to apply the embedding
+   * to an empty table, then that process will fail.
   */
-  vectorField(
-    options?: Partial<FieldOptions>,
-  ): [DataType, Map<string, EmbeddingFunction>] {
-    let dtype: DataType;
-    const dims = this.ndims() ?? options?.dims;
-    if (!options?.datatype) {
-      if (dims === undefined) {
-        throw new Error("ndims is required for vector field");
-      }
-      dtype = new FixedSizeList(dims, new Field("item", new Float32(), true));
-    } else {
-      if (isFixedSizeList(options.datatype)) {
-        dtype = options.datatype;
-      } else if (isFloat(options.datatype)) {
-        if (dims === undefined) {
-          throw new Error("ndims is required for vector field");
-        }
-        dtype = newVectorType(dims, options.datatype);
-      } else {
-        throw new Error(
-          "Expected FixedSizeList or Float as datatype for vector field",
-        );
-      }
-    }
-    const metadata = new Map<string, EmbeddingFunction>();
-    metadata.set("vector_column_for", this);
+  embeddingDimension?: number;

-    return [dtype, metadata];
-  }
+  /**
+   * The name of the column that will contain the embedding
+   *
+   * By default this is "vector"
+   */
+  destColumn?: string;

-  /** The number of dimensions of the embeddings */
-  ndims(): number | undefined {
-    return undefined;
-  }
-
-  /** The datatype of the embeddings */
-  abstract embeddingDataType(): Float;
+  /**
+   * Should the source column be excluded from the resulting table
+   *
+   * By default the source column is included.  Set this to true and
+   * only the embedding will be stored.
+   */
+  excludeSource?: boolean;

  /**
   * Creates a vector representation for the given values.
   */
-  abstract computeSourceEmbeddings(
-    data: T[],
-  ): Promise<number[][] | Float32Array[] | Float64Array[]>;
+  embed: (data: T[]) => Promise<number[][]>;
+}

-  /**
-  Compute the embeddings for a single query
- */
-  async computeQueryEmbeddings(
-    data: T,
-  ): Promise<number[] | Float32Array | Float64Array> {
-    return this.computeSourceEmbeddings([data]).then(
-      (embeddings) => embeddings[0],
-    );
+/** Test if the input seems to be an embedding function */
+export function isEmbeddingFunction<T>(
+  value: unknown,
+): value is EmbeddingFunction<T> {
+  if (typeof value !== "object" || value === null) {
+    return false;
  }
-}
-
-export interface FieldOptions<T extends DataType = DataType> {
-  datatype: T;
-  dims?: number;
+  if (!("sourceColumn" in value) || !("embed" in value)) {
+    return false;
+  }
+  return (
+    typeof value.sourceColumn === "string" && typeof value.embed === "function"
+  );
 }
--- a/nodejs/lancedb/embedding/index.ts
+++ b/nodejs/lancedb/embedding/index.ts
@@ -1,113 +1,2 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { DataType, Field, Schema } from "../arrow";
-import { isDataType } from "../arrow";
-import { sanitizeType } from "../sanitize";
-import { EmbeddingFunction } from "./embedding_function";
-import { EmbeddingFunctionConfig, getRegistry } from "./registry";
-
-export { EmbeddingFunction } from "./embedding_function";
-
-// We need to explicitly export '*' so that the `register` decorator actually registers the class.
-export * from "./openai";
-export * from "./registry";
-
-/**
- * Create a schema with embedding functions.
- *
- * @param fields
- * @returns Schema
- * @example
- * ```ts
- * class MyEmbeddingFunction extends EmbeddingFunction {
- * // ...
- * }
- * const func = new MyEmbeddingFunction();
- * const schema = LanceSchema({
- *   id: new Int32(),
- *   text: func.sourceField(new Utf8()),
- *   vector: func.vectorField(),
- *   // optional: specify the datatype and/or dimensions
- *   vector2: func.vectorField({ datatype: new Float32(), dims: 3}),
- * });
- *
- * const table = await db.createTable("my_table", data, { schema });
- * ```
- */
-export function LanceSchema(
-  fields: Record<string, [object, Map<string, EmbeddingFunction>] | object>,
-): Schema {
-  const arrowFields: Field[] = [];
-
-  const embeddingFunctions = new Map<
-    EmbeddingFunction,
-    Partial<EmbeddingFunctionConfig>
-  >();
-  Object.entries(fields).forEach(([key, value]) => {
-    if (isDataType(value)) {
-      arrowFields.push(new Field(key, sanitizeType(value), true));
-    } else {
-      const [dtype, metadata] = value as [
-        object,
-        Map<string, EmbeddingFunction>,
-      ];
-      arrowFields.push(new Field(key, sanitizeType(dtype), true));
-      parseEmbeddingFunctions(embeddingFunctions, key, metadata);
-    }
-  });
-  const registry = getRegistry();
-  const metadata = registry.getTableMetadata(
-    Array.from(embeddingFunctions.values()) as EmbeddingFunctionConfig[],
-  );
-  const schema = new Schema(arrowFields, metadata);
-  return schema;
-}
-
-function parseEmbeddingFunctions(
-  embeddingFunctions: Map<EmbeddingFunction, Partial<EmbeddingFunctionConfig>>,
-  key: string,
-  metadata: Map<string, EmbeddingFunction>,
-): void {
-  if (metadata.has("source_column_for")) {
-    const embedFunction = metadata.get("source_column_for")!;
-    const current = embeddingFunctions.get(embedFunction);
-    if (current !== undefined) {
-      embeddingFunctions.set(embedFunction, {
-        ...current,
-        sourceColumn: key,
-      });
-    } else {
-      embeddingFunctions.set(embedFunction, {
-        sourceColumn: key,
-        function: embedFunction,
-      });
-    }
-  } else if (metadata.has("vector_column_for")) {
-    const embedFunction = metadata.get("vector_column_for")!;
-
-    const current = embeddingFunctions.get(embedFunction);
-    if (current !== undefined) {
-      embeddingFunctions.set(embedFunction, {
-        ...current,
-        vectorColumn: key,
-      });
-    } else {
-      embeddingFunctions.set(embedFunction, {
-        vectorColumn: key,
-        function: embedFunction,
-      });
-    }
-  }
-}
+export { EmbeddingFunction, isEmbeddingFunction } from "./embedding_function";
+export { OpenAIEmbeddingFunction } from "./openai";
--- a/nodejs/lancedb/embedding/openai.ts
+++ b/nodejs/lancedb/embedding/openai.ts
@@ -13,31 +13,17 @@
 // limitations under the License.

 import type OpenAI from "openai";
-import { Float, Float32 } from "../arrow";
-import { EmbeddingFunction } from "./embedding_function";
-import { register } from "./registry";
+import { type EmbeddingFunction } from "./embedding_function";

-export type OpenAIOptions = {
-  apiKey?: string;
-  model?: string;
-};
-
-@register("openai")
-export class OpenAIEmbeddingFunction extends EmbeddingFunction<
-  string,
-  OpenAIOptions
-> {
-  #openai: OpenAI;
-  #modelName: string;
-
-  constructor(options: OpenAIOptions = { model: "text-embedding-ada-002" }) {
-    super();
-    const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
-    if (!openAIKey) {
-      throw new Error("OpenAI API key is required");
-    }
-    const modelName = options?.model ?? "text-embedding-ada-002";
+export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
+  private readonly _openai: OpenAI;
+  private readonly _modelName: string;

+  constructor(
+    sourceColumn: string,
+    openAIKey: string,
+    modelName: string = "text-embedding-ada-002",
+  ) {
    /**
     * @type {import("openai").default}
     */
@@ -50,40 +36,18 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
      throw new Error("please install openai@^4.24.1 using npm install openai");
    }

+    this.sourceColumn = sourceColumn;
    const configuration = {
      apiKey: openAIKey,
    };

-    this.#openai = new Openai(configuration);
-    this.#modelName = modelName;
+    this._openai = new Openai(configuration);
+    this._modelName = modelName;
  }

-  toJSON() {
-    return {
-      model: this.#modelName,
-    };
-  }
-
-  ndims(): number {
-    switch (this.#modelName) {
-      case "text-embedding-ada-002":
-        return 1536;
-      case "text-embedding-3-large":
-        return 3072;
-      case "text-embedding-3-small":
-        return 1536;
-      default:
-        return null as never;
-    }
-  }
-
-  embeddingDataType(): Float {
-    return new Float32();
-  }
-
-  async computeSourceEmbeddings(data: string[]): Promise<number[][]> {
-    const response = await this.#openai.embeddings.create({
-      model: this.#modelName,
+  async embed(data: string[]): Promise<number[][]> {
+    const response = await this._openai.embeddings.create({
+      model: this._modelName,
      input: data,
    });

@@ -94,15 +58,5 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
    return embeddings;
  }

-  async computeQueryEmbeddings(data: string): Promise<number[]> {
-    if (typeof data !== "string") {
-      throw new Error("Data must be a string");
-    }
-    const response = await this.#openai.embeddings.create({
-      model: this.#modelName,
-      input: data,
-    });
-
-    return response.data[0].embedding;
-  }
+  sourceColumn: string;
 }
--- a/nodejs/lancedb/embedding/registry.ts
+++ b/nodejs/lancedb/embedding/registry.ts
@@ -1,172 +0,0 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import type { EmbeddingFunction } from "./embedding_function";
-import "reflect-metadata";
-
-export interface EmbeddingFunctionOptions {
-  [key: string]: unknown;
-}
-
-export interface EmbeddingFunctionFactory<
-  T extends EmbeddingFunction = EmbeddingFunction,
-> {
-  new (modelOptions?: EmbeddingFunctionOptions): T;
-}
-
-interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
-  create(options?: EmbeddingFunctionOptions): T;
-}
-
-/**
- * This is a singleton class used to register embedding functions
- * and fetch them by name. It also handles serializing and deserializing.
- * You can implement your own embedding function by subclassing EmbeddingFunction
- * or TextEmbeddingFunction and registering it with the registry
- */
-export class EmbeddingFunctionRegistry {
-  #functions: Map<string, EmbeddingFunctionFactory> = new Map();
-
-  /**
-   * Register an embedding function
-   * @param name The name of the function
-   * @param func The function to register
-   */
-  register<T extends EmbeddingFunctionFactory = EmbeddingFunctionFactory>(
-    this: EmbeddingFunctionRegistry,
-    alias?: string,
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-  ): (ctor: T) => any {
-    const self = this;
-    return function (ctor: T) {
-      if (!alias) {
-        alias = ctor.name;
-      }
-      if (self.#functions.has(alias)) {
-        throw new Error(
-          `Embedding function with alias "${alias}" already exists`,
-        );
-      }
-      self.#functions.set(alias, ctor);
-      Reflect.defineMetadata("lancedb::embedding::name", alias, ctor);
-      return ctor;
-    };
-  }
-
-  /**
-   * Fetch an embedding function by name
-   * @param name The name of the function
-   */
-  get<T extends EmbeddingFunction<unknown> = EmbeddingFunction>(
-    name: string,
-  ): EmbeddingFunctionCreate<T> | undefined {
-    const factory = this.#functions.get(name);
-    if (!factory) {
-      return undefined;
-    }
-    return {
-      create: function (options: EmbeddingFunctionOptions) {
-        return new factory(options) as unknown as T;
-      },
-    };
-  }
-
-  /**
-   * reset the registry to the initial state
-   */
-  reset(this: EmbeddingFunctionRegistry) {
-    this.#functions.clear();
-  }
-
-  parseFunctions(
-    this: EmbeddingFunctionRegistry,
-    metadata: Map<string, string>,
-  ): Map<string, EmbeddingFunctionConfig> {
-    if (!metadata.has("embedding_functions")) {
-      return new Map();
-    } else {
-      type FunctionConfig = {
-        name: string;
-        sourceColumn: string;
-        vectorColumn: string;
-        model: EmbeddingFunctionOptions;
-      };
-      const functions = <FunctionConfig[]>(
-        JSON.parse(metadata.get("embedding_functions")!)
-      );
-      return new Map(
-        functions.map((f) => {
-          const fn = this.get(f.name);
-          if (!fn) {
-            throw new Error(`Function "${f.name}" not found in registry`);
-          }
-          return [
-            f.name,
-            {
-              sourceColumn: f.sourceColumn,
-              vectorColumn: f.vectorColumn,
-              function: this.get(f.name)!.create(f.model),
-            },
-          ];
-        }),
-      );
-    }
-  }
-  // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-  functionToMetadata(conf: EmbeddingFunctionConfig): Record<string, any> {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    const metadata: Record<string, any> = {};
-    const name = Reflect.getMetadata(
-      "lancedb::embedding::name",
-      conf.function.constructor,
-    );
-    metadata["sourceColumn"] = conf.sourceColumn;
-    metadata["vectorColumn"] = conf.vectorColumn ?? "vector";
-    metadata["name"] = name ?? conf.function.constructor.name;
-    metadata["model"] = conf.function.toJSON();
-    return metadata;
-  }
-
-  getTableMetadata(functions: EmbeddingFunctionConfig[]): Map<string, string> {
-    const metadata = new Map<string, string>();
-    const jsonData = functions.map((conf) => this.functionToMetadata(conf));
-    metadata.set("embedding_functions", JSON.stringify(jsonData));
-
-    return metadata;
-  }
-}
-
-const _REGISTRY = new EmbeddingFunctionRegistry();
-
-export function register(name?: string) {
-  return _REGISTRY.register(name);
-}
-
-/**
- * Utility function to get the global instance of the registry
- * @returns `EmbeddingFunctionRegistry` The global instance of the registry
- * @example
- * ```ts
- * const registry = getRegistry();
- * const openai = registry.get("openai").create();
- */
-export function getRegistry(): EmbeddingFunctionRegistry {
-  return _REGISTRY;
-}
-
-export interface EmbeddingFunctionConfig {
-  sourceColumn: string;
-  vectorColumn?: string;
-  function: EmbeddingFunction;
-}
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import { Table as ArrowTable, RecordBatch, tableFromIPC } from "./arrow";
+import { Table as ArrowTable, RecordBatch, tableFromIPC } from "apache-arrow";
 import { type IvfPqOptions } from "./indices";
 import {
  RecordBatchIterator as NativeBatchIterator,
@@ -170,7 +170,6 @@ export class QueryBase<
  /** Collect the results as an array of objects. */
  async toArray(): Promise<unknown[]> {
    const tbl = await this.toArrow();
-
    // eslint-disable-next-line @typescript-eslint/no-unsafe-return
    return tbl.toArray();
  }
--- a/nodejs/lancedb/sanitize.ts
+++ b/nodejs/lancedb/sanitize.ts
@@ -20,7 +20,6 @@
 // comes from the exact same library instance.  This is not always the case
 // and so we must sanitize the input to ensure that it is compatible.

-import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
 import {
  Binary,
  Bool,
@@ -76,9 +75,10 @@ import {
  Uint64,
  Union,
  Utf8,
-} from "./arrow";
+} from "apache-arrow";
+import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";

-export function sanitizeMetadata(
+function sanitizeMetadata(
  metadataLike?: unknown,
 ): Map<string, string> | undefined {
  if (metadataLike === undefined || metadataLike === null) {
@@ -97,7 +97,7 @@ export function sanitizeMetadata(
  return metadataLike as Map<string, string>;
 }

-export function sanitizeInt(typeLike: object) {
+function sanitizeInt(typeLike: object) {
  if (
    !("bitWidth" in typeLike) ||
    typeof typeLike.bitWidth !== "number" ||
@@ -111,14 +111,14 @@ export function sanitizeInt(typeLike: object) {
  return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth);
 }

-export function sanitizeFloat(typeLike: object) {
+function sanitizeFloat(typeLike: object) {
  if (!("precision" in typeLike) || typeof typeLike.precision !== "number") {
    throw Error("Expected a Float Type to have a `precision` property");
  }
  return new Float(typeLike.precision as Precision);
 }

-export function sanitizeDecimal(typeLike: object) {
+function sanitizeDecimal(typeLike: object) {
  if (
    !("scale" in typeLike) ||
    typeof typeLike.scale !== "number" ||
@@ -134,14 +134,14 @@ export function sanitizeDecimal(typeLike: object) {
  return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth);
 }

-export function sanitizeDate(typeLike: object) {
+function sanitizeDate(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Date type to have a `unit` property");
  }
  return new Date_(typeLike.unit as DateUnit);
 }

-export function sanitizeTime(typeLike: object) {
+function sanitizeTime(typeLike: object) {
  if (
    !("unit" in typeLike) ||
    typeof typeLike.unit !== "number" ||
@@ -155,7 +155,7 @@ export function sanitizeTime(typeLike: object) {
  return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth);
 }

-export function sanitizeTimestamp(typeLike: object) {
+function sanitizeTimestamp(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Timestamp type to have a `unit` property");
  }
@@ -166,7 +166,7 @@ export function sanitizeTimestamp(typeLike: object) {
  return new Timestamp(typeLike.unit, timezone);
 }

-export function sanitizeTypedTimestamp(
+function sanitizeTypedTimestamp(
  typeLike: object,
  // eslint-disable-next-line @typescript-eslint/naming-convention
  Datatype:
@@ -182,14 +182,14 @@ export function sanitizeTypedTimestamp(
  return new Datatype(timezone);
 }

-export function sanitizeInterval(typeLike: object) {
+function sanitizeInterval(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected an Interval type to have a `unit` property");
  }
  return new Interval(typeLike.unit);
 }

-export function sanitizeList(typeLike: object) {
+function sanitizeList(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a List type to have an array-like `children` property",
@@ -201,7 +201,7 @@ export function sanitizeList(typeLike: object) {
  return new List(sanitizeField(typeLike.children[0]));
 }

-export function sanitizeStruct(typeLike: object) {
+function sanitizeStruct(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a Struct type to have an array-like `children` property",
@@ -210,7 +210,7 @@ export function sanitizeStruct(typeLike: object) {
  return new Struct(typeLike.children.map((child) => sanitizeField(child)));
 }

-export function sanitizeUnion(typeLike: object) {
+function sanitizeUnion(typeLike: object) {
  if (
    !("typeIds" in typeLike) ||
    !("mode" in typeLike) ||
@@ -234,7 +234,7 @@ export function sanitizeUnion(typeLike: object) {
  );
 }

-export function sanitizeTypedUnion(
+function sanitizeTypedUnion(
  typeLike: object,
  // eslint-disable-next-line @typescript-eslint/naming-convention
  UnionType: typeof DenseUnion | typeof SparseUnion,
@@ -256,7 +256,7 @@ export function sanitizeTypedUnion(
  );
 }

-export function sanitizeFixedSizeBinary(typeLike: object) {
+function sanitizeFixedSizeBinary(typeLike: object) {
  if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") {
    throw Error(
      "Expected a FixedSizeBinary type to have a `byteWidth` property",
@@ -265,7 +265,7 @@ export function sanitizeFixedSizeBinary(typeLike: object) {
  return new FixedSizeBinary(typeLike.byteWidth);
 }

-export function sanitizeFixedSizeList(typeLike: object) {
+function sanitizeFixedSizeList(typeLike: object) {
  if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") {
    throw Error("Expected a FixedSizeList type to have a `listSize` property");
  }
@@ -283,7 +283,7 @@ export function sanitizeFixedSizeList(typeLike: object) {
  );
 }

-export function sanitizeMap(typeLike: object) {
+function sanitizeMap(typeLike: object) {
  if (!("children" in typeLike) || !Array.isArray(typeLike.children)) {
    throw Error(
      "Expected a Map type to have an array-like `children` property",
@@ -300,14 +300,14 @@ export function sanitizeMap(typeLike: object) {
  );
 }

-export function sanitizeDuration(typeLike: object) {
+function sanitizeDuration(typeLike: object) {
  if (!("unit" in typeLike) || typeof typeLike.unit !== "number") {
    throw Error("Expected a Duration type to have a `unit` property");
  }
  return new Duration(typeLike.unit);
 }

-export function sanitizeDictionary(typeLike: object) {
+function sanitizeDictionary(typeLike: object) {
  if (!("id" in typeLike) || typeof typeLike.id !== "number") {
    throw Error("Expected a Dictionary type to have an `id` property");
  }
@@ -329,7 +329,7 @@ export function sanitizeDictionary(typeLike: object) {
 }

 // biome-ignore lint/suspicious/noExplicitAny: skip
-export function sanitizeType(typeLike: unknown): DataType<any> {
+function sanitizeType(typeLike: unknown): DataType<any> {
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
@@ -449,7 +449,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
  }
 }

-export function sanitizeField(fieldLike: unknown): Field {
+function sanitizeField(fieldLike: unknown): Field {
  if (fieldLike instanceof Field) {
    return fieldLike;
  }
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -12,15 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import { Data, Schema, fromDataToBuffer, tableFromIPC } from "./arrow";
-
-import { getRegistry } from "./embedding/registry";
+import { Schema, tableFromIPC } from "apache-arrow";
+import { Data, fromDataToBuffer } from "./arrow";
 import { IndexOptions } from "./indices";
 import {
  AddColumnsSql,
  ColumnAlteration,
  IndexConfig,
-  OptimizeStats,
  Table as _NativeTable,
 } from "./native";
 import { Query, VectorQuery } from "./query";
@@ -52,23 +50,6 @@ export interface UpdateOptions {
  where: string;
 }

-export interface OptimizeOptions {
-  /**
-   * If set then all versions older than the given date
-   * be removed.  The current version will never be removed.
-   * The default is 7 days
-   * @example
-   * // Delete all versions older than 1 day
-   * const olderThan = new Date();
-   * olderThan.setDate(olderThan.getDate() - 1));
-   * tbl.cleanupOlderVersions(olderThan);
-   *
-   * // Delete all versions except the current version
-   * tbl.cleanupOlderVersions(new Date());
-   */
-  cleanupOlderThan: Date;
-}
-
 /**
 * A Table is a collection of Records in a LanceDB Database.
 *
@@ -123,14 +104,8 @@ export class Table {
   */
  async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
    const mode = options?.mode ?? "append";
-    const schema = await this.schema();
-    const registry = getRegistry();
-    const functions = registry.parseFunctions(schema.metadata);

-    const buffer = await fromDataToBuffer(
-      data,
-      functions.values().next().value,
-    );
+    const buffer = await fromDataToBuffer(data);
    await this.inner.add(buffer, mode);
  }

@@ -377,48 +352,6 @@ export class Table {
    await this.inner.restore();
  }

-  /**
-   * Optimize the on-disk data and indices for better performance.
-   *
-   * Modeled after ``VACUUM`` in PostgreSQL.
-   *
-   *  Optimization covers three operations:
-   *
-   *  - Compaction: Merges small files into larger ones
-   *  - Prune: Removes old versions of the dataset
-   *  - Index: Optimizes the indices, adding new data to existing indices
-   *
-   *
-   *  Experimental API
-   *  ----------------
-   *
-   *  The optimization process is undergoing active development and may change.
-   *  Our goal with these changes is to improve the performance of optimization and
-   *  reduce the complexity.
-   *
-   *  That being said, it is essential today to run optimize if you want the best
-   *  performance.  It should be stable and safe to use in production, but it our
-   *  hope that the API may be simplified (or not even need to be called) in the
-   *  future.
-   *
-   *  The frequency an application shoudl call optimize is based on the frequency of
-   *  data modifications.  If data is frequently added, deleted, or updated then
-   *  optimize should be run frequently.  A good rule of thumb is to run optimize if
-   *  you have added or modified 100,000 or more records or run more than 20 data
-   *  modification operations.
-   */
-  async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
-    let cleanupOlderThanMs;
-    if (
-      options?.cleanupOlderThan !== undefined &&
-      options?.cleanupOlderThan !== null
-    ) {
-      cleanupOlderThanMs =
-        new Date().getTime() - options.cleanupOlderThan.getTime();
-    }
-    return await this.inner.optimize(cleanupOlderThanMs);
-  }
-
  /** List all indices that have been created with {@link Table.createIndex} */
  async listIndices(): Promise<IndexConfig[]> {
    return await this.inner.listIndices();
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.5.0",
+	"version": "0.4.20",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.5.0",
+	"version": "0.4.20",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.5.0",
+	"version": "0.4.20",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.5.0",
+	"version": "0.4.20",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.5.0",
+	"version": "0.4.14",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -1,12 +1,8 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.5.0",
-  "main": "dist/index.js",
-  "exports": {
-    ".": "./dist/index.js",
-    "./embedding": "./dist/embedding/index.js"
-  },
-  "types": "dist/index.d.ts",
+  "version": "0.4.20",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
  "napi": {
    "name": "lancedb",
    "triples": {
@@ -66,7 +62,6 @@
  },
  "dependencies": {
    "apache-arrow": "^15.0.0",
-    "openai": "^4.29.2",
-    "reflect-metadata": "^0.2.2"
+    "openai": "^4.29.2"
  }
 }
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -15,8 +15,8 @@
 use arrow_ipc::writer::FileWriter;
 use lancedb::ipc::ipc_file_to_batches;
 use lancedb::table::{
-    AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
-    OptimizeAction, OptimizeOptions, Table as LanceDbTable,
+    AddDataMode, ColumnAlteration as LanceColumnAlteration, NewColumnTransform,
+    Table as LanceDbTable,
 };
 use napi::bindgen_prelude::*;
 use napi_derive::napi;
@@ -263,60 +263,6 @@ impl Table {
        self.inner_ref()?.restore().await.default_error()
    }

-    #[napi]
-    pub async fn optimize(&self, older_than_ms: Option<i64>) -> napi::Result<OptimizeStats> {
-        let inner = self.inner_ref()?;
-
-        let older_than = if let Some(ms) = older_than_ms {
-            if ms == i64::MIN {
-                return Err(napi::Error::from_reason(format!(
-                    "older_than_ms can not be {}",
-                    i32::MIN,
-                )));
-            }
-            Duration::try_milliseconds(ms)
-        } else {
-            None
-        };
-
-        let compaction_stats = inner
-            .optimize(OptimizeAction::Compact {
-                options: lancedb::table::CompactionOptions::default(),
-                remap_options: None,
-            })
-            .await
-            .default_error()?
-            .compaction
-            .unwrap();
-        let prune_stats = inner
-            .optimize(OptimizeAction::Prune {
-                older_than,
-                delete_unverified: None,
-            })
-            .await
-            .default_error()?
-            .prune
-            .unwrap();
-        inner
-            .optimize(lancedb::table::OptimizeAction::Index(
-                OptimizeOptions::default(),
-            ))
-            .await
-            .default_error()?;
-        Ok(OptimizeStats {
-            compaction: CompactionStats {
-                files_added: compaction_stats.files_added as i64,
-                files_removed: compaction_stats.files_removed as i64,
-                fragments_added: compaction_stats.fragments_added as i64,
-                fragments_removed: compaction_stats.fragments_removed as i64,
-            },
-            prune: RemovalStats {
-                bytes_removed: prune_stats.bytes_removed as i64,
-                old_versions_removed: prune_stats.old_versions as i64,
-            },
-        })
-    }
-
    #[napi]
    pub async fn list_indices(&self) -> napi::Result<Vec<IndexConfig>> {
        Ok(self
@@ -352,40 +298,6 @@ impl From<lancedb::index::IndexConfig> for IndexConfig {
    }
 }

-/// Statistics about a compaction operation.
-#[napi(object)]
-#[derive(Clone, Debug)]
-pub struct CompactionStats {
-    /// The number of fragments removed
-    pub fragments_removed: i64,
-    /// The number of new, compacted fragments added
-    pub fragments_added: i64,
-    /// The number of data files removed
-    pub files_removed: i64,
-    /// The number of new, compacted data files added
-    pub files_added: i64,
-}
-
-/// Statistics about a cleanup operation
-#[napi(object)]
-#[derive(Clone, Debug)]
-pub struct RemovalStats {
-    /// The number of bytes removed
-    pub bytes_removed: i64,
-    /// The number of old versions removed
-    pub old_versions_removed: i64,
-}
-
-/// Statistics about an optimize operation
-#[napi(object)]
-#[derive(Clone, Debug)]
-pub struct OptimizeStats {
-    /// Statistics about the compaction operation
-    pub compaction: CompactionStats,
-    /// Statistics about the removal operation
-    pub prune: RemovalStats,
-}
-
 ///  A definition of a column alteration. The alteration changes the column at
 /// `path` to have the new name `name`, to be nullable if `nullable` is true,
 /// and to have the data type `data_type`. At least one of `rename` or `nullable`
--- a/nodejs/tsconfig.json
+++ b/nodejs/tsconfig.json
@@ -7,9 +7,7 @@
    "outDir": "./dist",
    "strict": true,
    "allowJs": true,
-    "resolveJsonModule": true,
-    "emitDecoratorMetadata": true,
-    "experimentalDecorators": true
+    "resolveJsonModule": true
  },
  "exclude": ["./dist/*"],
  "typedocOptions": {
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -0,0 +1,8 @@
+[bumpversion]
+current_version = 0.6.13
+commit = True
+message = [python] Bump version: {current_version} → {new_version}
+tag = True
+tag_name = python-v{new_version}
+
+[bumpversion:file:pyproject.toml]
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,34 +0,0 @@
-[tool.bumpversion]
-current_version = "0.8.1"
-parse = """(?x)
-    (?P<major>0|[1-9]\\d*)\\.
-    (?P<minor>0|[1-9]\\d*)\\.
-    (?P<patch>0|[1-9]\\d*)
-    (?:-(?P<pre_l>[a-zA-Z-]+)\\.(?P<pre_n>0|[1-9]\\d*))?
-"""
-serialize = [
-    "{major}.{minor}.{patch}-{pre_l}.{pre_n}",
-    "{major}.{minor}.{patch}",
-]
-search = "{current_version}"
-replace = "{new_version}"
-regex = false
-ignore_missing_version = false
-ignore_missing_files = false
-tag = true
-sign_tags = false
-tag_name = "python-v{new_version}"
-tag_message = "Bump version: {current_version} → {new_version}"
-allow_dirty = true
-commit = true
-message = "Bump version: {current_version} → {new_version}"
-commit_args = ""
-
-[tool.bumpversion.parts.pre_l]
-values = ["beta", "final"]
-optional_value = "final"
-
-[[tool.bumpversion.files]]
-filename = "Cargo.toml"
-search = "\nversion = \"{current_version}\""
-replace = "\nversion = \"{new_version}\""
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.8.1"
+version = "0.4.10"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,16 +1,16 @@
 [project]
 name = "lancedb"
-# version in Cargo.toml
+version = "0.6.13"
 dependencies = [
    "deprecation",
-    "pylance==0.11.1",
+    "pylance==0.10.12",
    "ratelimiter~=1.0",
    "requests>=2.31.0",
    "retry>=0.9.2",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "attrs>=21.3.0",
-    "packaging",
+    "semver",
    "cachetools",
    "overrides>=0.7",
 ]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -86,17 +86,3 @@ class VectorQuery:
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
    def bypass_vector_index(self): ...
-
-class CompactionStats:
-    fragments_removed: int
-    fragments_added: int
-    files_removed: int
-    files_added: int
-
-class RemovalStats:
-    bytes_removed: int
-    old_versions_removed: int
-
-class OptimizeStats:
-    compaction: CompactionStats
-    prune: RemovalStats
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -509,7 +509,7 @@ class AsyncConnection(object):
        return self._inner.__repr__()

    def __enter__(self):
-        return self
+        self

    def __exit__(self, *_):
        self.close()
@@ -779,7 +779,7 @@ class AsyncConnection(object):
        name: str,
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
-    ) -> AsyncTable:
+    ) -> Table:
        """Open a Lance Table in the database.

        Parameters
--- a/python/python/lancedb/embeddings/bedrock.py
+++ b/python/python/lancedb/embeddings/bedrock.py
@@ -74,7 +74,7 @@ class BedRockText(TextEmbeddingFunction):
    profile_name: Union[str, None] = None
    role_session_name: str = "lancedb-embeddings"

-    if PYDANTIC_VERSION.major < 2:  # Pydantic 1.x compat
+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat

        class Config:
            keep_untouched = (cached_property,)
--- a/python/python/lancedb/embeddings/gemini_text.py
+++ b/python/python/lancedb/embeddings/gemini_text.py
@@ -90,7 +90,7 @@ class GeminiText(TextEmbeddingFunction):
    query_task_type: str = "retrieval_query"
    source_task_type: str = "retrieval_document"

-    if PYDANTIC_VERSION.major < 2:  # Pydantic 1.x compat
+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat

        class Config:
            keep_untouched = (cached_property,)
--- a/python/python/lancedb/embeddings/imagebind.py
+++ b/python/python/lancedb/embeddings/imagebind.py
@@ -40,7 +40,7 @@ class ImageBindEmbeddings(EmbeddingFunction):
    device: str = "cpu"
    normalize: bool = False

-    if PYDANTIC_VERSION.major < 2:  # Pydantic 1.x compat
+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat

        class Config:
            keep_untouched = (cached_property,)
--- a/python/python/lancedb/embeddings/transformers.py
+++ b/python/python/lancedb/embeddings/transformers.py
@@ -54,7 +54,7 @@ class TransformersEmbeddingFunction(EmbeddingFunction):
        self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.name)
        self._model = transformers.AutoModel.from_pretrained(self.name)

-    if PYDANTIC_VERSION.major < 2:  # Pydantic 1.x compat
+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat

        class Config:
            keep_untouched = (cached_property,)
--- a/python/python/lancedb/pydantic.py
+++ b/python/python/lancedb/pydantic.py
@@ -35,13 +35,13 @@ from typing import (
 import numpy as np
 import pyarrow as pa
 import pydantic
-from packaging.version import Version
+import semver

-PYDANTIC_VERSION = Version(pydantic.__version__)
+PYDANTIC_VERSION = semver.parse_version_info(pydantic.__version__)
 try:
    from pydantic_core import CoreSchema, core_schema
 except ImportError:
-    if PYDANTIC_VERSION.major >= 2:
+    if PYDANTIC_VERSION >= (2,):
        raise

 if TYPE_CHECKING:
@@ -144,7 +144,7 @@ def Vector(
                raise TypeError("A list of numbers or numpy.ndarray is needed")
            return cls(v)

-        if PYDANTIC_VERSION.major < 2:
+        if PYDANTIC_VERSION < (2, 0):

            @classmethod
            def __modify_schema__(cls, field_schema: Dict[str, Any]):
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -1,5 +1,5 @@
 import os
-from packaging.version import Version
+import semver
 from functools import cached_property
 from typing import Union

@@ -44,8 +44,9 @@ class CohereReranker(Reranker):
    def _client(self):
        cohere = attempt_import_or_raise("cohere")
        # ensure version is at least 0.5.0
-        if hasattr(cohere, "__version__") and Version(cohere.__version__) < Version(
-            "0.5.0"
+        if (
+            hasattr(cohere, "__version__")
+            and semver.compare(cohere.__version__, "5.0.0") < 0
        ):
            raise ValueError(
                f"cohere version must be at least 0.5.0, found {cohere.__version__}"
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -58,7 +58,7 @@ if TYPE_CHECKING:
    import PIL
    from lance.dataset import CleanupStats, ReaderLike

-    from ._lancedb import Table as LanceDBTable, OptimizeStats
+    from ._lancedb import Table as LanceDBTable
    from .db import LanceDBConnection
    from .index import BTree, IndexConfig, IvfPq

@@ -2377,49 +2377,6 @@ class AsyncTable:
        """
        await self._inner.restore()

-    async def optimize(
-        self, *, cleanup_older_than: Optional[timedelta] = None
-    ) -> OptimizeStats:
-        """
-        Optimize the on-disk data and indices for better performance.
-
-        Modeled after ``VACUUM`` in PostgreSQL.
-
-        Optimization covers three operations:
-
-         * Compaction: Merges small files into larger ones
-         * Prune: Removes old versions of the dataset
-         * Index: Optimizes the indices, adding new data to existing indices
-
-        Parameters
-        ----------
-        cleanup_older_than: timedelta, optional default 7 days
-            All files belonging to versions older than this will be removed.  Set
-            to 0 days to remove all versions except the latest.  The latest version
-            is never removed.
-
-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
-        data modifications.  If data is frequently added, deleted, or updated then
-        optimize should be run frequently.  A good rule of thumb is to run optimize if
-        you have added or modified 100,000 or more records or run more than 20 data
-        modification operations.
-        """
-        if cleanup_older_than is not None:
-            cleanup_older_than = round(cleanup_older_than.total_seconds() * 1000)
-        return await self._inner.optimize(cleanup_older_than)
-
    async def list_indices(self) -> IndexConfig:
        """
        List all indices that have been created with Self::create_index
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -296,13 +296,6 @@ async def test_close(tmp_path):
        await db.table_names()


-@pytest.mark.asyncio
-async def test_context_manager(tmp_path):
-    with await lancedb.connect_async(tmp_path) as db:
-        assert db.is_open()
-    assert not db.is_open()
-
-
@pytest.mark.asyncio
 async def test_create_mode_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_pydantic.py
+++ b/python/python/tests/test_pydantic.py
@@ -178,7 +178,7 @@ def test_fixed_size_list_field():
        li: List[int]

    data = TestModel(vec=list(range(16)), li=[1, 2, 3])
-    if PYDANTIC_VERSION.major >= 2:
+    if PYDANTIC_VERSION >= (2,):
        assert json.loads(data.model_dump_json()) == {
            "vec": list(range(16)),
            "li": [1, 2, 3],
@@ -197,7 +197,7 @@ def test_fixed_size_list_field():
        ]
    )

-    if PYDANTIC_VERSION.major >= 2:
+    if PYDANTIC_VERSION >= (2,):
        json_schema = TestModel.model_json_schema()
    else:
        json_schema = TestModel.schema()
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1025,29 +1025,3 @@ async def test_time_travel(db_async: AsyncConnection):
    # Can't use restore if not checked out
    with pytest.raises(ValueError, match="checkout before running restore"):
        await table.restore()
-
-
-@pytest.mark.asyncio
-async def test_optimize(db_async: AsyncConnection):
-    table = await db_async.create_table(
-        "test",
-        data=[{"x": [1]}],
-    )
-    await table.add(
-        data=[
-            {"x": [2]},
-        ],
-    )
-    stats = await table.optimize()
-    assert stats.compaction.files_removed == 2
-    assert stats.compaction.files_added == 1
-    assert stats.compaction.fragments_added == 1
-    assert stats.compaction.fragments_removed == 2
-    assert stats.prune.bytes_removed == 0
-    assert stats.prune.old_versions_removed == 0
-
-    stats = await table.optimize(cleanup_older_than=timedelta(seconds=0))
-    assert stats.prune.bytes_removed > 0
-    assert stats.prune.old_versions_removed == 3
-
-    assert await table.query().to_arrow() == pa.table({"x": [[1], [2]]})
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -2,9 +2,7 @@ use arrow::{
    ffi_stream::ArrowArrayStreamReader,
    pyarrow::{FromPyArrow, ToPyArrow},
 };
-use lancedb::table::{
-    AddDataMode, Duration, OptimizeAction, OptimizeOptions, Table as LanceDbTable,
-};
+use lancedb::table::{AddDataMode, Table as LanceDbTable};
 use pyo3::{
    exceptions::{PyRuntimeError, PyValueError},
    pyclass, pymethods,
@@ -19,40 +17,6 @@ use crate::{
    query::Query,
 };

-/// Statistics about a compaction operation.
-#[pyclass(get_all)]
-#[derive(Clone, Debug)]
-pub struct CompactionStats {
-    /// The number of fragments removed
-    pub fragments_removed: u64,
-    /// The number of new, compacted fragments added
-    pub fragments_added: u64,
-    /// The number of data files removed
-    pub files_removed: u64,
-    /// The number of new, compacted data files added
-    pub files_added: u64,
-}
-
-/// Statistics about a cleanup operation
-#[pyclass(get_all)]
-#[derive(Clone, Debug)]
-pub struct RemovalStats {
-    /// The number of bytes removed
-    pub bytes_removed: u64,
-    /// The number of old versions removed
-    pub old_versions_removed: u64,
-}
-
-/// Statistics about an optimize operation
-#[pyclass(get_all)]
-#[derive(Clone, Debug)]
-pub struct OptimizeStats {
-    /// Statistics about the compaction operation
-    pub compaction: CompactionStats,
-    /// Statistics about the removal operation
-    pub prune: RemovalStats,
-}
-
 #[pyclass]
 pub struct Table {
    // We keep a copy of the name to use if the inner table is dropped
@@ -227,58 +191,4 @@ impl Table {
    pub fn query(&self) -> Query {
        Query::new(self.inner_ref().unwrap().query())
    }
-
-    pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> {
-        let inner = self_.inner_ref()?.clone();
-        let older_than = if let Some(ms) = cleanup_since_ms {
-            if ms > i64::MAX as u64 {
-                return Err(PyValueError::new_err(format!(
-                    "cleanup_since_ms must be between {} and -{}",
-                    i32::MAX,
-                    i32::MAX
-                )));
-            }
-            Duration::try_milliseconds(ms as i64)
-        } else {
-            None
-        };
-        future_into_py(self_.py(), async move {
-            let compaction_stats = inner
-                .optimize(OptimizeAction::Compact {
-                    options: lancedb::table::CompactionOptions::default(),
-                    remap_options: None,
-                })
-                .await
-                .infer_error()?
-                .compaction
-                .unwrap();
-            let prune_stats = inner
-                .optimize(OptimizeAction::Prune {
-                    older_than,
-                    delete_unverified: None,
-                })
-                .await
-                .infer_error()?
-                .prune
-                .unwrap();
-            inner
-                .optimize(lancedb::table::OptimizeAction::Index(
-                    OptimizeOptions::default(),
-                ))
-                .await
-                .infer_error()?;
-            Ok(OptimizeStats {
-                compaction: CompactionStats {
-                    files_added: compaction_stats.files_added as u64,
-                    files_removed: compaction_stats.files_removed as u64,
-                    fragments_added: compaction_stats.fragments_added as u64,
-                    fragments_removed: compaction_stats.fragments_removed as u64,
-                },
-                prune: RemovalStats {
-                    bytes_removed: prune_stats.bytes_removed,
-                    old_versions_removed: prune_stats.old_versions,
-                },
-            })
-        })
-    }
 }
--- a/release_process.md
+++ b/release_process.md
@@ -8,51 +8,6 @@ The Python package is versioned and released separately from the Rust and Node.j
 ones. For Rust and Node.js, the release process is shared between `lancedb` and
 `vectordb` for now.

-## Preview releases
-
-LanceDB has full releases about every 2 weeks, but in between we make frequent
-preview releases. These are released as `0.x.y.betaN` versions. They receive the
-same level of testing as normal releases and let you get access to the latest
-features. However, we do not guarantee that preview releases will be available
-more than 6 months after they are released. We may delete the preview releases
-from the packaging index after a while. Once your application is stable, we
-recommend switching to full releases, which will never be removed from package
-indexes.
-
-## Making releases
-
-The release process uses a handful of GitHub actions to automate the process.
-
-```text
-  ┌─────────────────────┐                                                
-  │Create Release Commit│                                                
-  └─┬───────────────────┘                                                
-    │                           ┌────────────┐ ┌──►Python GH Release     
-    ├──►(tag) python-vX.Y.Z ───►│PyPI Publish├─┤                         
-    │                           └────────────┘ └──►Python Wheels         
-    │                                                                    
-    │                           ┌───────────┐                            
-    └──►(tag) vX.Y.Z ───┬──────►│NPM Publish├──┬──►Rust/Node GH Release  
-                        │       └───────────┘  │                         
-                        │                      └──►NPM Packages          
-                        │       ┌─────────────┐                          
-                        └──────►│Cargo Publish├───►Cargo Release         
-                                └─────────────┘                          
-```
-
-To start a release, trigger a `Create Release Commit` action from
-[the workflows page](https://github.com/lancedb/lancedb/actions/workflows/make-release-commit.yml)
-(Click on "Run workflow").
-
-* **For a preview release**, leave the default parameters.
-* **For a stable release**, set the `release_type` input to `stable`.
-
-> [!IMPORTANT]
-> If there was a breaking change since the last stable release, and we haven't
-> done so yet, we should increment the minor version. The CI will detect if this
-> is needed and fail the `Create Release Commit` job. To fix, select the
-> "bump minor version" option.
-
 ## Breaking changes

 We try to avoid breaking changes, but sometimes they are necessary. When there
@@ -66,10 +21,12 @@ body of the PR. A CI job will add a `breaking-change` label to the PR, which is
 what will ultimately be used to CI to determine if the minor version should be
 incremented.

-> [!IMPORTANT]
-> Reviewers should check that PRs with breaking changes receive the `breaking-change`
-> label. If a PR is missing the label, please add it, even if after it was merged.
-> This label is used in the release process.
+A CI job will validate that if a `breaking-change` label is added, the minor
+version is incremented in the `Cargo.toml` and `pyproject.toml` files. The only
+exception is if it has already been incremented since the last stable release.
+
+**It is the responsibility of the PR author to increment the minor version when
+appropriate.**

 Some things that are considered breaking changes:

--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.5.0"
+version = "0.4.20"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/ffi/node/src/error.rs
+++ b/rust/ffi/node/src/error.rs
@@ -19,12 +19,10 @@ use snafu::Snafu;

 #[derive(Debug, Snafu)]
 pub enum Error {
-    #[allow(dead_code)]
    #[snafu(display("column '{name}' is missing"))]
    MissingColumn { name: String },
    #[snafu(display("{name}: {message}"))]
    OutOfRange { name: String, message: String },
-    #[allow(dead_code)]
    #[snafu(display("{index_type} is not a valid index type"))]
    InvalidIndexType { index_type: String },

--- a/rust/ffi/node/src/neon_ext/js_object_ext.rs
+++ b/rust/ffi/node/src/neon_ext/js_object_ext.rs
@@ -19,7 +19,6 @@ use neon::prelude::*;
 pub trait JsObjectExt {
    fn get_opt_u32(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<u32>>;
    fn get_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<usize>;
-    #[allow(dead_code)]
    fn get_opt_usize(&self, cx: &mut FunctionContext, key: &str) -> Result<Option<usize>>;
 }

--- a/rust/ffi/node/src/table.rs
+++ b/rust/ffi/node/src/table.rs
@@ -324,7 +324,7 @@ impl JsTable {
        rt.spawn(async move {
            let stats = table
                .optimize(OptimizeAction::Prune {
-                    older_than: Some(older_than),
+                    older_than,
                    delete_unverified,
                })
                .await;
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.5.0"
+version = "0.4.20"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -38,11 +38,10 @@ url.workspace = true
 regex.workspace = true
 serde = { version = "^1" }
 serde_json = { version = "1" }
-serde_with = { version = "3.8.1" }
 # For remote feature
 reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }
-polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
-polars = { version = ">=0.37,<0.40.0", optional = true}
+polars-arrow = { version = ">=0.37", optional = true }
+polars = { version = ">=0.37", optional = true}

 [dev-dependencies]
 tempfile = "3.5.0"
@@ -50,12 +49,9 @@ rand = { version = "0.8.3", features = ["small_rng"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 walkdir = "2"
 # For s3 integration tests (dev deps aren't allowed to be optional atm)
-# We pin these because the content-length check breaks with localstack
-# https://github.com/smithy-lang/smithy-rs/releases/tag/release-2024-05-21
-aws-sdk-s3 = { version = "=1.23.0" }
-aws-sdk-kms = { version = "=1.21.0" }
+aws-sdk-s3 = { version = "1.0" }
+aws-sdk-kms = { version = "1.0" }
 aws-config = { version = "1.0" }
-aws-smithy-runtime = { version = "=1.3.0" }

 [features]
 default = []
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`$d51afd07-e3cd-4c76-9b9b-787e13fd55b0<62>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>string08`
				`@@ -1 +0,0 @@`
				`$15648e72-076f-4ef1-8b90-10d305b95b3b<33>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>string08`
				`@@ -1 +0,0 @@`
				`$a3689caf-4f6b-4afc-a3c7-97af75661843<34>oitem <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>string8price <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>double80vector <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*fixed_size_list:float:28`