python v0.1.4

Support S3 and GCS from typescript SDK (#106 )
add embedding functions to the nodejs client (#95 )
2025-12-23 05:19:58 +00:00 · 2023-05-31 20:11:25 -07:00 · 2023-05-30 21:32:17 -07:00 · 2023-05-26 18:09:20 -07:00 · 2023-05-25 17:57:26 -06:00 · 2023-05-25 17:01:52 -06:00
42 changed files with 1364 additions and 706 deletions
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -67,10 +67,8 @@ jobs:
    - name: Build
      run: |
        npm ci
-        npm run tsc
        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/vectordb-*.tgz
+        npm run tsc
    - name: Test
      run: npm run test
  macos:
@@ -96,10 +94,8 @@ jobs:
    - name: Build
      run: |
        npm ci
-        npm run tsc
        npm run build
-        npm run pack-build
-        npm install --no-save ./dist/vectordb-*.tgz
+        npm run tsc
    - name: Test
      run: |
        npm run test
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -31,6 +31,7 @@ jobs:
    - name: Install lancedb
      run: |
        pip install -e .
+        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest
    - name: Run tests
      run: pytest -x -v --durations=30 tests
@@ -49,10 +50,11 @@ jobs:
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
-        python-version: "3.10"
+        python-version: "3.11"
    - name: Install lancedb
      run: |
        pip install -e .
+        pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
        pip install pytest
    - name: Run tests
      run: pytest -x -v --durations=30 tests
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,181 +0,0 @@
-name: Prepare Release
-
-# Based on https://github.com/dherman/neon-prebuild-example/blob/eaa4d33d682e5eb7abbc3da7aed153a1b1acb1b3/.github/workflows/publish.yml
-
-on:
-  push:
-    tags:
-      - v*
-
-jobs:
-  draft-release:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: softprops/action-gh-release@v1
-        with:
-          draft: true
-          prerelease: true # hardcoded on for now
-          generate_release_notes: true
-  
-  rust:
-    runs-on: ubuntu-latest
-    needs: draft-release
-    defaults:
-      run:
-        shell: bash
-        working-directory: rust/vectordb
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          lfs: true
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Package Rust
-        run: cargo package --all-features
-      - uses: softprops/action-gh-release@v1
-        with:
-          draft: true
-          files: target/vectordb-*.crate
-    
-  python:
-    runs-on: ubuntu-latest
-    needs: draft-release
-    defaults:
-      run:
-        shell: bash
-        working-directory: python
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          lfs: true
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-      - name: Build wheel
-        run: |
-          pip install wheel
-          python setup.py sdist bdist_wheel
-      - uses: softprops/action-gh-release@v1
-        with:
-          draft: true
-          files: |
-            python/dist/lancedb-*.tar.gz 
-            python/dist/lancedb-*.whl
-
-  node:
-    runs-on: ubuntu-latest
-    needs: draft-release
-    defaults:
-      run:
-        shell: bash
-        working-directory: node
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - uses: actions/setup-node@v3
-        with:
-          node-version: 20
-          cache: 'npm'
-          cache-dependency-path: node/package-lock.json
-      - name: Install dependencies
-        run: |
-          sudo apt update
-          sudo apt install -y protobuf-compiler libssl-dev
-      - name: Build
-        run: |
-          npm ci
-          npm run tsc
-          npm pack
-      - uses: softprops/action-gh-release@v1
-        with:
-          draft: true
-          files: node/vectordb-*.tgz
-
-  node-macos:
-    runs-on: macos-12
-    needs: draft-release
-    strategy:
-      fail-fast: false
-      matrix:
-        target: [x86_64-apple-darwin, aarch64-apple-darwin]
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Install system dependencies
-      run: brew install protobuf
-    - name: Install npm dependencies
-      run: |
-        cd node
-        npm ci
-    - name: Build MacOS native node modules
-      run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
-    - uses: softprops/action-gh-release@v1
-      with:
-        draft: true
-        files: node/dist/vectordb-darwin*.tgz
-
-  node-linux:
-    runs-on: ubuntu-latest
-    needs: draft-release
-    strategy:
-      fail-fast: false
-      matrix:
-        target:
-          - x86_64-unknown-linux-gnu
-          - aarch64-unknown-linux-gnu
-          - aarch64-unknown-linux-musl
-          - x86_64-unknown-linux-musl
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v2
-    - name: Setup Rust
-      uses: ATiltedTree/setup-rust@v1
-      with:
-        rust-version: stable
-    - name: Install system dependencies
-      run: |
-        sudo apt update
-        sudo apt install -y protobuf-compiler libssl-dev
-        cargo install cross
-    - name: Install npm dependencies
-      run: |
-        cd node
-        npm ci
-    - name: Build Linux native node modules
-      run: bash ci/build_linux_artifacts.sh ${{ matrix.target }}
-    - uses: softprops/action-gh-release@v1
-      with:
-        draft: true
-        files: node/dist/vectordb-linux*.tgz
-
-  release:
-    needs: [python, node, node-macos, node-linux, rust]
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/download-artifact@v3
-      - name: Publish to PyPI
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
-        run: |
-          python -m twine upload --non-interactive \
-            --skip-existing \
-            --repository testpypi python/dist/*
-      - name: Publish to NPM
-        run: |
-          for filename in node/dist/*.tgz; do
-            npm publish --dry-run $filename
-          done
-      - name: Publish to crates.io
-        env:
-          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
-        run: |
-          cargo publish --dry-run --no-verify rust/target/vectordb-*.crate
-      # - uses: softprops/action-gh-release@v1
-      #   with:
-      #     draft: false
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,6 @@
 **/__pycache__
 .DS_Store

-.vscode
-
 rust/target
 rust/Cargo.lock

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1052,6 +1052,7 @@ dependencies = [
 "paste",
 "petgraph",
 "rand",
+ "regex",
 "uuid",
 ]

@@ -1645,9 +1646,9 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "0.4.12"
+version = "0.4.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc96cf89139af6f439a0e28ccd04ddf81be795b79fda3105b7a8952fadeb778e"
+checksum = "86dda8185bd1ffae7b910c1f68035af23be9b717c52e9cc4de176cd30b47f772"
 dependencies = [
 "accelerate-src",
 "arrow",
@@ -1684,6 +1685,7 @@ dependencies = [
 "rand",
 "reqwest",
 "shellexpand",
+ "snafu",
 "sqlparser-lance",
 "tokio",
 "url",
@@ -3356,18 +3358,21 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"

 [[package]]
 name = "vectordb"
-version = "0.1.2"
+version = "0.0.1"
 dependencies = [
 "arrow-array",
+ "arrow-data",
 "arrow-schema",
 "lance",
+ "object_store",
+ "rand",
 "tempfile",
 "tokio",
 ]

 [[package]]
 name = "vectordb-node"
-version = "0.1.2"
+version = "0.1.0"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
--- a/Cross.toml
+++ b/Cross.toml
@@ -1,36 +0,0 @@
-# These make sure our builds are compatible with old glibc versions.
-[target.x86_64-unknown-linux-gnu]
-pre-build = [
-    # Install newer gfortran
-    "yum install -y centos-release-scl",
-    "yum install -y openssl-devel unzip devtoolset-11-gcc-gfortran",
-    # protobuf is too old, so we directly download binaries
-    "PB_REL=https://github.com/protocolbuffers/protobuf/releases",
-    "PB_VERSION=23.1",
-    "curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-x86_64.zip",
-    "unzip protoc-$PB_VERSION-linux-x86_64.zip -d /usr/local",
-]
-image = "ghcr.io/cross-rs/x86_64-unknown-linux-gnu:main-centos"
-
-[target.aarch64-unknown-linux-gnu]
-pre-build = [
-    "yum install -y openssl-devel unzip",
-    # protobuf is too old, so we directly download binaries
-    "PB_REL=https://github.com/protocolbuffers/protobuf/releases",
-    "PB_VERSION=23.1",
-    "curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-x86_64.zip",
-    "unzip protoc-$PB_VERSION-linux-x86_64.zip -d /usr/local",
-]
-image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:main-centos"
-
-[target.x86_64-unknown-linux-musl]
-pre-build = [
-    "dpkg --add-architecture $CROSS_DEB_ARCH",
-    "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
-]
-
-[target.aarch64-unknown-linux-musl]
-pre-build = [
-    "dpkg --add-architecture $CROSS_DEB_ARCH",
-    "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH",
-]
--- a/ci/build_linux_artifacts.sh
+++ b/ci/build_linux_artifacts.sh
@@ -1,40 +0,0 @@
-# Builds the Linux artifacts (node binaries).
-# Usage: ./build_linux_artifacts.sh [target]
-# Targets supported: 
-# - x86_64-unknown-linux-gnu:centos
-# - aarch64-unknown-linux-gnu:centos
-# - aarch64-unknown-linux-musl
-# - x86_64-unknown-linux-musl
-
-# On MacOS, need to run in a linux container:
-# docker run -v $(pwd):/io -w /io 
-
-# Must run rustup toolchain install stable-x86_64-unknown-linux-gnu --force-non-host
-
-set -e
-
-build_node_binaries() {
-    pushd node
-    
-    for target in $1
-    do
-        echo "Building node library for $target"
-        # cross doesn't yet pass this down to Docker, so we do it ourselves.
-        export CROSS_CONTAINER_OPTS="--platform linux/amd64"
-        if [[ $target == *musl ]]; then
-            # This is needed for cargo to allow build cdylibs with musl
-            RUSTFLAGS="-C target-feature=-crt-static"
-        fi
-        npm run cross-release -- --target $target
-        npm run pack-build -- --target $target
-    done
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    # targets="x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu aarch64-unknown-linux-musl x86_64-unknown-linux-musl"
-    targets="aarch64-unknown-linux-gnu"
-fi
-build_node_binaries $targets
--- a/ci/build_macos_artifacts.sh
+++ b/ci/build_macos_artifacts.sh
@@ -1,22 +0,0 @@
-# Builds the macOS artifacts (node binaries).
-# Usage: ./build_macos_artifacts.sh [target]
-# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
-
-build_node_binaries() {
-    pushd node
-    
-    for target in $1
-    do
-        echo "Building node library for $target"
-        npm run build-release -- --target $target
-        npm run pack-build -- --target $target
-    done
-    popd
-}
-
-if [ -n "$1" ]; then
-    targets=$1
-else
-    targets="x86_64-apple-darwin aarch64-apple-darwin"
-fi
-build_node_binaries $targets
--- a/ci/ubuntu_build.dockerfile
+++ b/ci/ubuntu_build.dockerfile
@@ -1,31 +0,0 @@
-# On MacOS, need to run in a linux container:
-# cat ci/ubuntu_build.dockerfile | docker build -t lancedb-node-build -
-# docker run -v /var/run/docker.sock:/var/run/docker.sock -v $(pwd):/io -w /io lancedb-node-build bash ci/build_linux_artifacts.sh
-FROM ubuntu:20.04
-
-ARG DEBIAN_FRONTEND=noninteractive
-ENV TZ=Europe/Moscow
-
-RUN apt update && apt install -y protobuf-compiler libssl-dev build-essential curl \
-    software-properties-common npm docker.io
-
-# Install rust
-RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
-
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-# Install cross
-# https://github.com/cross-rs/cross/issues/1257#issuecomment-1544553706
-RUN cargo install cross --git https://github.com/cross-rs/cross
-
-# Install additional build targets
-RUN rustup target add x86_64-unknown-linux-gnu aarch64-unknown-linux-gnu aarch64-unknown-linux-musl x86_64-unknown-linux-musl
-
-# Install node
-RUN npm install npm@latest -g && \
-    npm install n -g && \
-    n latest
-
-# set CROSS_CONTAINER_IN_CONTAINER to inform `cross` that it is executed from within a container
-ENV CROSS_CONTAINER_IN_CONTAINER=true
-ENV CROSS_CONTAINER_ENGINE_NO_BUILDKIT=1
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -19,6 +19,7 @@ nav:
 - Basics: basic.md
 - Embeddings: embedding.md
 - Indexing: ann_indexes.md
+- Full-text search: fts.md
 - Integrations: integrations.md
 - Python API: python.md

--- a/docs/src/fts.md
+++ b/docs/src/fts.md
@@ -0,0 +1,51 @@
+# [EXPERIMENTAL] Full text search
+
+LanceDB now provides experimental support for full text search.
+This is currently Python only. We plan to push the integration down to Rust in the future
+to make this available for JS as well.
+
+## Installation
+
+To use full text search, you must install optional dependency tantivy-py:
+
+# tantivy 0.19.2
+pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985
+
+
+## Quickstart
+
+Assume:
+1. `table` is a LanceDB Table
+2. `text` is the name of the Table column that we want to index
+
+To create the index:
+
+```python
+table.create_fts_index("text")
+```
+
+To search:
+
+```python
+df = table.search("puppy").limit(10).select(["text"]).to_df()
+```
+
+LanceDB automatically looks for an FTS index if the input is str.
+
+## Multiple text columns
+
+If you have multiple columns to index, pass them all as a list to `create_fts_index`:
+
+```python
+table.create_fts_index(["text1", "text2"])
+```
+
+Note that the search API call does not change - you can search over all indexed columns at once.
+
+## Current limitations
+
+1. Currently we do not yet support incremental writes.
+If you add data after fts index creation, it won't be reflected
+in search results until you do a full reindex.
+
+2. We currently only support local filesystem paths for the fts index.
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -45,5 +45,6 @@ We will be adding completed demo apps built using LanceDB.
 * [`Basic Operations`](basic.md) - basic functionality of LanceDB.
 * [`Embedding Functions`](embedding.md) - functions for working with embeddings.
 * [`Indexing`](ann_indexes.md) - create vector indexes to speed up queries.
+* [`Full text search`](fts.md) - [EXPERIMENTAL] full-text search API
 * [`Ecosystem Integrations`](integrations.md) - integrating LanceDB with python data tooling ecosystem.
 * [`API Reference`](python.md) - detailed documentation for the LanceDB Python SDK.
--- a/node/.npmignore
+++ b/node/.npmignore
@@ -1,2 +0,0 @@
-gen_test_data.py
-index.node
--- a/node/README.md
+++ b/node/README.md
@@ -8,9 +8,6 @@ A JavaScript / Node.js library for [LanceDB](https://github.com/lancedb/lancedb)
 npm install vectordb
 ```

-This will download the appropriate native library for your platform. We currently
-support x86_64 Linux, Intel MacOS, and ARM (M1/M2) MacOS.
-
 ## Usage

 ### Basic Example
@@ -27,19 +24,6 @@ The [examples](./examples) folder contains complete examples.

 ## Development

-Build and install the rust library with:
-
-```bash
-npm run build
-npm run pack-build
-npm install --no-save ./dist/vectordb-*.tgz
-```
-
-`npm run build` builds the Rust library, `npm run pack-build` packages the Rust
-binary into an npm module called `@vectordb/<platform>` (for example, 
-`@vectordb/darwin-arm64.node`), and then `npm run install ...` installs that
-module.
-
 The LanceDB javascript is built with npm:

 ```bash
--- a/node/native.js
+++ b/node/native.js
@@ -12,20 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-const { currentTarget } = require('@neon-rs/load');
-
 let nativeLib;

-try {
-    nativeLib = require(`@vectordb/${currentTarget()}`);
-} catch (e) {
-    throw new Error(`vectordb: failed to load native library.
-  You may need to run \`npm install @vectordb/${currentTarget()}\`.
-
-  If that does not work, please file a bug report at https://github.com/lancedb/lancedb/issues
-
-  Source error: ${e}`);
+function getPlatformLibrary() {
+    if (process.platform === "darwin" && process.arch == "arm64") {
+        return require('./aarch64-apple-darwin.node');
+    } else if (process.platform === "darwin" && process.arch == "x64") {
+        return require('./x86_64-apple-darwin.node');
+    } else if (process.platform === "linux" && process.arch == "x64") {
+        return require('./x86_64-unknown-linux-gnu.node');
+    } else {
+        throw new Error(`vectordb: unsupported platform ${process.platform}_${process.arch}. Please file a bug report at https://github.com/lancedb/lancedb/issues`)
+    }
 }

-// Dynamic require for runtime.
-module.exports = nativeLib;
+try {
+    nativeLib = require('./index.node')
+} catch (e) {
+    if (e.code === "MODULE_NOT_FOUND") {
+        nativeLib = getPlatformLibrary();
+    } else {
+        throw new Error('vectordb: failed to load native library. Please file a bug report at https://github.com/lancedb/lancedb/issues');
+    }
+}
+
+module.exports = nativeLib
+
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -7,26 +7,12 @@
    "": {
      "name": "vectordb",
      "version": "0.1.1",
-      "cpu": [
-        "x64",
-        "arm64"
-      ],
      "license": "Apache-2.0",
-      "os": [
-        "darwin",
-        "linux"
-      ],
      "dependencies": {
        "@apache-arrow/ts": "^12.0.0",
-        "@neon-rs/load": "^0.0.74",
-        "@vectordb/darwin-arm64": "0.1.1",
-        "@vectordb/darwin-x64": "0.1.1",
-        "@vectordb/linux-x64-gnu": "0.1.1",
-        "@vectordb/linux-x64-musl": "0.1.1",
        "apache-arrow": "^12.0.0"
      },
      "devDependencies": {
-        "@neon-rs/cli": "^0.0.74",
        "@types/chai": "^4.3.4",
        "@types/mocha": "^10.0.1",
        "@types/node": "^18.16.2",
@@ -44,12 +30,6 @@
        "ts-node": "^10.9.1",
        "ts-node-dev": "^2.0.0",
        "typescript": "*"
-      },
-      "optionalDependencies": {
-        "@vectordb/darwin-arm64": "0.1.1",
-        "@vectordb/darwin-x64": "0.1.1",
-        "@vectordb/linux-x64-gnu": "0.1.1",
-        "@vectordb/linux-x64-musl": "0.1.1"
      }
    },
    "node_modules/@apache-arrow/ts": {
@@ -217,20 +197,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "node_modules/@neon-rs/cli": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
-      "integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
-      "dev": true,
-      "bin": {
-        "neon": "index.js"
-      }
-    },
-    "node_modules/@neon-rs/load": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
-      "integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
-    },
    "node_modules/@nodelib/fs.scandir": {
      "version": "2.1.5",
      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
@@ -4225,17 +4191,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "@neon-rs/cli": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.74.tgz",
-      "integrity": "sha512-9lPmNmjej5iKKOTMPryOMubwkgMRyTWRuaq1yokASvI5mPhr2kzPN7UVjdCOjQvpunNPngR9yAHoirpjiWhUHw==",
-      "dev": true
-    },
-    "@neon-rs/load": {
-      "version": "0.0.74",
-      "resolved": "https://registry.npmjs.org/@neon-rs/load/-/load-0.0.74.tgz",
-      "integrity": "sha512-/cPZD907UNz55yrc/ud4wDgQKtU1TvkD9jeqZWG6J4IMmZkp6zgjkQcKA8UvpkZlcpPHvc8J17sGzLFbP/LUYg=="
-    },
    "@nodelib/fs.scandir": {
      "version": "2.1.5",
      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
--- a/node/package.json
+++ b/node/package.json
@@ -1,17 +1,15 @@
 {
  "name": "vectordb",
-  "version": "0.1.2",
+  "version": "0.1.1",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
  "scripts": {
    "tsc": "tsc -b",
-    "build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
+    "build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json-render-diagnostics",
    "build-release": "npm run build -- --release",
-    "cross-release": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cross build --message-format=json --release -p vectordb-node",
    "test": "mocha -recursive dist/test",
-    "lint": "eslint src --ext .js,.ts",
-    "pack-build": "neon pack-build"
+    "lint": "eslint src --ext .js,.ts"
  },
  "repository": {
    "type": "git",
@@ -26,7 +24,6 @@
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "devDependencies": {
-    "@neon-rs/cli": "^0.0.74",
    "@types/chai": "^4.3.4",
    "@types/mocha": "^10.0.1",
    "@types/node": "^18.16.2",
@@ -47,33 +44,6 @@
  },
  "dependencies": {
    "@apache-arrow/ts": "^12.0.0",
-    "@neon-rs/load": "^0.0.74",
    "apache-arrow": "^12.0.0"
-  },
-  "os": [
-    "darwin",
-    "linux"
-  ],
-  "cpu": [
-    "x64",
-    "arm64"
-  ],
-  "neon": {
-    "targets": {
-      "x86_64-apple-darwin": "@vectordb/darwin-x64",
-      "aarch64-apple-darwin": "@vectordb/darwin-arm64",
-      "x86_64-unknown-linux-gnu": "@vectordb/linux-x64-gnu",
-      "x86_64-unknown-linux-musl": "@vectordb/linux-x64-musl",
-      "aarch64-unknown-linux-gnu": "@vectordb/linux-arm64-gnu",
-      "aarch64-unknown-linux-musl": "@vectordb/linux-arm64-musl"
-    }
-  },
-  "optionalDependencies": {
-    "@vectordb/darwin-arm64": "0.1.2",
-    "@vectordb/darwin-x64": "0.1.2",
-    "@vectordb/linux-x64-gnu": "0.1.2",
-    "@vectordb/linux-x64-musl": "0.1.2",
-    "@vectordb/linux-arm64-gnu": "0.1.2",
-    "@vectordb/linux-arm64-musl": "0.1.2"
  }
 }
--- a/node/src/arrow.ts
+++ b/node/src/arrow.ts
@@ -15,15 +15,16 @@
 import {
  Field,
  Float32,
-  List,
+  List, type ListBuilder,
  makeBuilder,
  RecordBatchFileWriter,
-  Table,
+  Table, Utf8,
  type Vector,
  vectorFromArray
 } from 'apache-arrow'
+import { type EmbeddingFunction } from './index'

-export function convertToTable (data: Array<Record<string, unknown>>): Table {
+export function convertToTable<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Table {
  if (data.length === 0) {
    throw new Error('At least one record needs to be provided')
  }
@@ -33,11 +34,7 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {

  for (const columnsKey of columns) {
    if (columnsKey === 'vector') {
-      const children = new Field<Float32>('item', new Float32())
-      const list = new List(children)
-      const listBuilder = makeBuilder({
-        type: list
-      })
+      const listBuilder = newVectorListBuilder()
      const vectorSize = (data[0].vector as any[]).length
      for (const datum of data) {
        if ((datum[columnsKey] as any[]).length !== vectorSize) {
@@ -52,15 +49,37 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {
      for (const datum of data) {
        values.push(datum[columnsKey])
      }
-      records[columnsKey] = vectorFromArray(values)
+
+      if (columnsKey === embeddings?.sourceColumn) {
+        const vectors = embeddings.embed(values as T[])
+        const listBuilder = newVectorListBuilder()
+        vectors.map(v => listBuilder.append(v))
+        records.vector = listBuilder.finish().toVector()
+      }
+
+      if (typeof values[0] === 'string') {
+        // `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column
+        records[columnsKey] = vectorFromArray(values, new Utf8())
+      } else {
+        records[columnsKey] = vectorFromArray(values)
+      }
    }
  }

  return new Table(records)
 }

-export async function fromRecordsToBuffer (data: Array<Record<string, unknown>>): Promise<Buffer> {
-  const table = convertToTable(data)
+// Creates a new Arrow ListBuilder that stores a Vector column
+function newVectorListBuilder (): ListBuilder<Float32, any> {
+  const children = new Field<Float32>('item', new Float32())
+  const list = new List(children)
+  return makeBuilder({
+    type: list
+  })
+}
+
+export async function fromRecordsToBuffer<T> (data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Buffer> {
+  const table = convertToTable(data, embeddings)
  const writer = RecordBatchFileWriter.writeAll(table)
  return Buffer.from(await writer.toUint8Array())
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -21,14 +21,15 @@ import {
 import { fromRecordsToBuffer } from './arrow'

 // eslint-disable-next-line @typescript-eslint/no-var-requires
-const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd } = require('../native.js')
+const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')

 /**
 * Connect to a LanceDB instance at the given URI
 * @param uri The uri of the database.
 */
 export async function connect (uri: string): Promise<Connection> {
-  return new Connection(uri)
+  const db = await databaseNew(uri)
+  return new Connection(db, uri)
 }

 /**
@@ -38,9 +39,9 @@ export class Connection {
  private readonly _uri: string
  private readonly _db: any

-  constructor (uri: string) {
+  constructor (db: any, uri: string) {
    this._uri = uri
-    this._db = databaseNew(uri)
+    this._db = db
  }

  get uri (): string {
@@ -55,17 +56,50 @@ export class Connection {
  }

  /**
-     * Open a table in the database.
-     * @param name The name of the table.
-     */
-  async openTable (name: string): Promise<Table> {
+   * Open a table in the database.
+   *
+   * @param name The name of the table.
+   */
+  async openTable (name: string): Promise<Table>
+  /**
+   * Open a table in the database.
+   *
+   * @param name The name of the table.
+   * @param embeddings An embedding function to use on this Table
+   */
+  async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
+  async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
    const tbl = await databaseOpenTable.call(this._db, name)
-    return new Table(tbl, name)
+    if (embeddings !== undefined) {
+      return new Table(tbl, name, embeddings)
+    } else {
+      return new Table(tbl, name)
+    }
  }

-  async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
-    await tableCreate.call(this._db, name, await fromRecordsToBuffer(data))
-    return await this.openTable(name)
+  /**
+   * Creates a new Table and initialize it with new data.
+   *
+   * @param name The name of the table.
+   * @param data Non-empty Array of Records to be inserted into the Table
+   */
+
+  async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table>
+  /**
+   * Creates a new Table and initialize it with new data.
+   *
+   * @param name The name of the table.
+   * @param data Non-empty Array of Records to be inserted into the Table
+   * @param embeddings An embedding function to use on this Table
+   */
+  async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
+  async createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
+    const tbl = await tableCreate.call(this._db, name, await fromRecordsToBuffer(data, embeddings))
+    if (embeddings !== undefined) {
+      return new Table(tbl, name, embeddings)
+    } else {
+      return new Table(tbl, name)
+    }
  }

  async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
@@ -75,16 +109,22 @@ export class Connection {
  }
 }

-/**
- * A table in a LanceDB database.
- */
-export class Table {
+export class Table<T = number[]> {
  private readonly _tbl: any
  private readonly _name: string
+  private readonly _embeddings?: EmbeddingFunction<T>

-  constructor (tbl: any, name: string) {
+  constructor (tbl: any, name: string)
+  /**
+   * @param tbl
+   * @param name
+   * @param embeddings An embedding function to use when interacting with this table
+   */
+  constructor (tbl: any, name: string, embeddings: EmbeddingFunction<T>)
+  constructor (tbl: any, name: string, embeddings?: EmbeddingFunction<T>) {
    this._tbl = tbl
    this._name = name
+    this._embeddings = embeddings
  }

  get name (): string {
@@ -92,72 +132,173 @@ export class Table {
  }

  /**
-     * Create a search query to find the nearest neighbors of the given query vector.
-     * @param queryVector The query vector.
-     */
-  search (queryVector: number[]): Query {
+   * Creates a search query to find the nearest neighbors of the given search term
+   * @param query The query search term
+   */
+  search (query: T): Query {
+    let queryVector: number[]
+    if (this._embeddings !== undefined) {
+      queryVector = this._embeddings.embed([query])[0]
+    } else {
+      queryVector = query as number[]
+    }
    return new Query(this._tbl, queryVector)
  }

  /**
-   * Insert records into this Table
-   * @param data Records to be inserted into the Table
+   * Insert records into this Table.
   *
-   * @param mode Append / Overwrite existing records. Default: Append
+   * @param data Records to be inserted into the Table
   * @return The number of rows added to the table
   */
  async add (data: Array<Record<string, unknown>>): Promise<number> {
-    return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Append.toString())
+    return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Append.toString())
  }

+  /**
+   * Insert records into this Table, replacing its contents.
+   *
+   * @param data Records to be inserted into the Table
+   * @return The number of rows added to the table
+   */
  async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
-    return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString())
+    return tableAdd.call(this._tbl, await fromRecordsToBuffer(data, this._embeddings), WriteMode.Overwrite.toString())
+  }
+
+  /**
+   * Create an ANN index on this Table vector index.
+   *
+   * @param indexParams The parameters of this Index, @see VectorIndexParams.
+   */
+  async create_index (indexParams: VectorIndexParams): Promise<any> {
+    return tableCreateVectorIndex.call(this._tbl, indexParams)
  }
 }

+interface IvfPQIndexConfig {
+  /**
+   * The column to be indexed
+   */
+  column?: string
+
+  /**
+   * A unique name for the index
+   */
+  index_name?: string
+
+  /**
+   * Metric type, L2 or Cosine
+   */
+  metric_type?: MetricType
+
+  /**
+   * The number of partitions this index
+   */
+  num_partitions?: number
+
+  /**
+   * The max number of iterations for kmeans training.
+   */
+  max_iters?: number
+
+  /**
+   * Train as optimized product quantization.
+   */
+  use_opq?: boolean
+
+  /**
+   * Number of subvectors to build PQ code
+   */
+  num_sub_vectors?: number
+  /**
+   * The number of bits to present one PQ centroid.
+   */
+  num_bits?: number
+
+  /**
+   * Max number of iterations to train OPQ, if `use_opq` is true.
+   */
+  max_opq_iters?: number
+
+  type: 'ivf_pq'
+}
+
+export type VectorIndexParams = IvfPQIndexConfig
+
 /**
 * A builder for nearest neighbor queries for LanceDB.
 */
 export class Query {
  private readonly _tbl: any
-  private readonly _query_vector: number[]
+  private readonly _queryVector: number[]
  private _limit: number
-  private readonly _refine_factor?: number
-  private readonly _nprobes: number
+  private _refineFactor?: number
+  private _nprobes: number
  private readonly _columns?: string[]
  private _filter?: string
-  private readonly _metric = 'L2'
+  private _metricType?: MetricType

  constructor (tbl: any, queryVector: number[]) {
    this._tbl = tbl
-    this._query_vector = queryVector
+    this._queryVector = queryVector
    this._limit = 10
    this._nprobes = 20
-    this._refine_factor = undefined
+    this._refineFactor = undefined
    this._columns = undefined
    this._filter = undefined
+    this._metricType = undefined
  }

+  /***
+   * Sets the number of results that will be returned
+   * @param value number of results
+   */
  limit (value: number): Query {
    this._limit = value
    return this
  }

+  /**
+   * Refine the results by reading extra elements and re-ranking them in memory.
+   * @param value refine factor to use in this query.
+   */
+  refineFactor (value: number): Query {
+    this._refineFactor = value
+    return this
+  }
+
+  /**
+   * The number of probes used. A higher number makes search more accurate but also slower.
+   * @param value The number of probes used.
+   */
+  nprobes (value: number): Query {
+    this._nprobes = value
+    return this
+  }
+
+  /**
+   * A filter statement to be applied to this query.
+   * @param value A filter in the same format used by a sql WHERE clause.
+   */
  filter (value: string): Query {
    this._filter = value
    return this
  }

  /**
-     * Execute the query and return the results as an Array of Objects
-     */
+   * The MetricType used for this Query.
+   * @param value The metric to the. @see MetricType for the different options
+   */
+  metricType (value: MetricType): Query {
+    this._metricType = value
+    return this
+  }
+
+  /**
+   * Execute the query and return the results as an Array of Objects
+   */
  async execute<T = Record<string, unknown>> (): Promise<T[]> {
-    let buffer
-    if (this._filter != null) {
-      buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter)
-    } else {
-      buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
-    }
+    const buffer = await tableSearch.call(this._tbl, this)
    const data = tableFromIPC(buffer)
    return data.toArray().map((entry: Record<string, unknown>) => {
      const newObject: Record<string, unknown> = {}
@@ -177,3 +318,33 @@ export enum WriteMode {
  Overwrite = 'overwrite',
  Append = 'append'
 }
+
+/**
+ * An embedding function that automatically creates vector representation for a given column.
+ */
+export interface EmbeddingFunction<T> {
+  /**
+   * The name of the column that will be used as input for the Embedding Function.
+   */
+  sourceColumn: string
+
+  /**
+   * Creates a vector representation for the given values.
+   */
+  embed: (data: T[]) => number[][]
+}
+
+/**
+ * Distance metrics type.
+ */
+export enum MetricType {
+  /**
+   * Euclidean distance
+   */
+  L2 = 'l2',
+
+  /**
+   * Cosine distance
+   */
+  Cosine = 'cosine'
+}
--- a/node/src/test/io.ts
+++ b/node/src/test/io.ts
@@ -0,0 +1,52 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// IO tests
+
+import { describe } from 'mocha'
+import { assert } from 'chai'
+
+import * as lancedb from '../index'
+
+describe('LanceDB S3 client', function () {
+  if (process.env.TEST_S3_BASE_URL != null) {
+    const baseUri = process.env.TEST_S3_BASE_URL
+    it('should have a valid url', async function () {
+      const uri = `${baseUri}/valid_url`
+      const table = await createTestDB(uri, 2, 20)
+      const con = await lancedb.connect(uri)
+      assert.equal(con.uri, uri)
+
+      const results = await table.search([0.1, 0.3]).limit(5).execute()
+      assert.equal(results.length, 5)
+    })
+  } else {
+    describe.skip('Skip S3 test', function () {})
+  }
+})
+
+async function createTestDB (uri: string, numDimensions: number = 2, numRows: number = 2): Promise<lancedb.Table> {
+  const con = await lancedb.connect(uri)
+
+  const data = []
+  for (let i = 0; i < numRows; i++) {
+    const vector = []
+    for (let j = 0; j < numDimensions; j++) {
+      vector.push(i + (j * 0.1))
+    }
+    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
+  }
+
+  return await con.createTable('vectors', data)
+}
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -17,6 +17,7 @@ import { assert } from 'chai'
 import { track } from 'temp'

 import * as lancedb from '../index'
+import { type EmbeddingFunction, MetricType, Query } from '../index'

 describe('LanceDB client', function () {
  describe('when creating a connection to lancedb', function () {
@@ -67,7 +68,7 @@ describe('LanceDB client', function () {
      const uri = await createTestDB()
      const con = await lancedb.connect(uri)
      const table = await con.openTable('vectors')
-      const results = await table.search([0.1, 0.3]).filter('id == 2').execute()
+      const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
      assert.equal(results.length, 1)
      assert.equal(results[0].id, 2)
    })
@@ -96,8 +97,8 @@ describe('LanceDB client', function () {
      const con = await lancedb.connect(dir)

      const data = [
-        { id: 1, vector: [0.1, 0.2], price: 10 },
-        { id: 2, vector: [1.1, 1.2], price: 50 }
+        { id: 1, vector: [0.1, 0.2], price: 10, name: 'a' },
+        { id: 2, vector: [1.1, 1.2], price: 50, name: 'b' }
      ]

      const table = await con.createTable('vectors', data)
@@ -105,8 +106,8 @@ describe('LanceDB client', function () {
      assert.equal(results.length, 2)

      const dataAdd = [
-        { id: 3, vector: [2.1, 2.2], price: 10 },
-        { id: 4, vector: [3.1, 3.2], price: 50 }
+        { id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
+        { id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
      ]
      await table.add(dataAdd)
      const resultsAdd = await table.search([0.1, 0.3]).execute()
@@ -130,16 +131,76 @@ describe('LanceDB client', function () {
      assert.equal(resultsAdd.length, 2)
    })
  })
+
+  describe('when creating a vector index', function () {
+    it('overwrite all records in a table', async function () {
+      const uri = await createTestDB(32, 300)
+      const con = await lancedb.connect(uri)
+      const table = await con.openTable('vectors')
+      await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
+    }).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
+  })
+
+  describe('when using a custom embedding function', function () {
+    class TextEmbedding implements EmbeddingFunction<string> {
+      sourceColumn: string
+
+      constructor (targetColumn: string) {
+        this.sourceColumn = targetColumn
+      }
+
+      _embedding_map = new Map<string, number[]>([
+        ['foo', [2.1, 2.2]],
+        ['bar', [3.1, 3.2]]
+      ])
+
+      embed (data: string[]): number[][] {
+        return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0])
+      }
+    }
+
+    it('should encode the original data into embeddings', async function () {
+      const dir = await track().mkdir('lancejs')
+      const con = await lancedb.connect(dir)
+      const embeddings = new TextEmbedding('name')
+
+      const data = [
+        { price: 10, name: 'foo' },
+        { price: 50, name: 'bar' }
+      ]
+      const table = await con.createTable('vectors', data, embeddings)
+      const results = await table.search('foo').execute()
+      assert.equal(results.length, 2)
+    })
+  })
 })

-async function createTestDB (): Promise<string> {
+describe('Query object', function () {
+  it('sets custom parameters', async function () {
+    const query = new Query(undefined, [0.1, 0.3])
+      .limit(1)
+      .metricType(MetricType.Cosine)
+      .refineFactor(100)
+      .nprobes(20) as Record<string, any>
+    assert.equal(query._limit, 1)
+    assert.equal(query._metricType, MetricType.Cosine)
+    assert.equal(query._refineFactor, 100)
+    assert.equal(query._nprobes, 20)
+  })
+})
+
+async function createTestDB (numDimensions: number = 2, numRows: number = 2): Promise<string> {
  const dir = await track().mkdir('lancejs')
  const con = await lancedb.connect(dir)

-  const data = [
-    { id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
-    { id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
-  ]
+  const data = []
+  for (let i = 0; i < numRows; i++) {
+    const vector = []
+    for (let j = 0; j < numDimensions; j++) {
+      vector.push(i + (j * 0.1))
+    }
+    data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
+  }

  await con.createTable('vectors', data)
  return dir
--- a/python/lancedb/fts.py
+++ b/python/lancedb/fts.py
@@ -0,0 +1,128 @@
+#  Copyright 2023 LanceDB Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Full text search index using tantivy-py"""
+import os
+from typing import List, Tuple
+
+import pyarrow as pa
+
+try:
+    import tantivy
+except ImportError:
+    raise ImportError(
+        "Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
+    )
+
+from .table import LanceTable
+
+
+def create_index(index_path: str, text_fields: List[str]) -> tantivy.Index:
+    """
+    Create a new Index (not populated)
+
+    Parameters
+    ----------
+    index_path : str
+        Path to the index directory
+    text_fields : List[str]
+        List of text fields to index
+
+    Returns
+    -------
+    index : tantivy.Index
+        The index object (not yet populated)
+    """
+    # Declaring our schema.
+    schema_builder = tantivy.SchemaBuilder()
+    # special field that we'll populate with row_id
+    schema_builder.add_integer_field("doc_id", stored=True)
+    # data fields
+    for name in text_fields:
+        schema_builder.add_text_field(name, stored=True)
+    schema = schema_builder.build()
+    os.makedirs(index_path, exist_ok=True)
+    index = tantivy.Index(schema, path=index_path)
+    return index
+
+
+def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -> int:
+    """
+    Populate an index with data from a LanceTable
+
+    Parameters
+    ----------
+    index : tantivy.Index
+        The index object
+    table : LanceTable
+        The table to index
+    fields : List[str]
+        List of fields to index
+    """
+    # first check the fields exist and are string or large string type
+    for name in fields:
+        f = table.schema.field(name)  # raises KeyError if not found
+        if not pa.types.is_string(f.type) and not pa.types.is_large_string(f.type):
+            raise TypeError(f"Field {name} is not a string type")
+
+    # create a tantivy writer
+    writer = index.writer()
+    # write data into index
+    dataset = table.to_lance()
+    row_id = 0
+    for b in dataset.to_batches(columns=fields):
+        for i in range(b.num_rows):
+            doc = tantivy.Document()
+            doc.add_integer("doc_id", row_id)
+            for name in fields:
+                doc.add_text(name, b[name][i].as_py())
+            writer.add_document(doc)
+            row_id += 1
+    # commit changes
+    writer.commit()
+    return row_id
+
+
+def search_index(
+    index: tantivy.Index, query: str, limit: int = 10
+) -> Tuple[Tuple[int], Tuple[float]]:
+    """
+    Search an index for a query
+
+    Parameters
+    ----------
+    index : tantivy.Index
+        The index object
+    query : str
+        The query string
+    limit : int
+        The maximum number of results to return
+
+    Returns
+    -------
+    ids_and_score: list[tuple[int], tuple[float]]
+        A tuple of two tuples, the first containing the document ids
+        and the second containing the scores
+    """
+    searcher = index.searcher()
+    query = index.parse_query(query)
+    # get top results
+    results = searcher.search(query, limit)
+    return tuple(
+        zip(
+            *[
+                (searcher.doc(doc_address)["doc_id"][0], score)
+                for score, doc_address in results.hits
+            ]
+        )
+    )
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -14,6 +14,7 @@ from __future__ import annotations

 import numpy as np
 import pandas as pd
+import pyarrow as pa

 from .common import VECTOR_COLUMN_NAME

@@ -131,7 +132,6 @@ class LanceQueryBuilder:
        vector and the returned vector.
        """
        ds = self._table.to_lance()
-        # TODO indexed search
        tbl = ds.to_table(
            columns=self._columns,
            filter=self._where,
@@ -145,3 +145,26 @@ class LanceQueryBuilder:
            },
        )
        return tbl.to_pandas()
+
+
+class LanceFtsQueryBuilder(LanceQueryBuilder):
+    def to_df(self) -> pd.DataFrame:
+        try:
+            import tantivy
+        except ImportError:
+            raise ImportError(
+                "Please install tantivy-py `pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985` to use the full text search feature."
+            )
+
+        from .fts import search_index
+
+        # get the index path
+        index_path = self._table._get_fts_index_path()
+        # open the index
+        index = tantivy.Index.open(index_path)
+        # get the scores and doc ids
+        row_ids, scores = search_index(index, self._query, self._limit)
+        scores = pa.array(scores)
+        output_tbl = self._table.to_lance().take(row_ids, columns=self._columns)
+        output_tbl = output_tbl.append_column("score", scores)
+        return output_tbl.to_pandas()
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -14,7 +14,9 @@
 from __future__ import annotations

 import os
+import shutil
 from functools import cached_property
+from typing import List, Union

 import lance
 import numpy as np
@@ -24,7 +26,8 @@ from lance import LanceDataset
 from lance.vector import vec_to_table

 from .common import DATA, VEC, VECTOR_COLUMN_NAME
-from .query import LanceQueryBuilder
+from .query import LanceFtsQueryBuilder, LanceQueryBuilder
+from .util import get_uri_scheme


 def _sanitize_data(data, schema):
@@ -130,6 +133,27 @@ class LanceTable:
        )
        self._reset_dataset()

+    def create_fts_index(self, field_names: Union[str, List[str]]):
+        """Create a full-text search index on the table.
+
+        Warning - this API is highly experimental and is highly likely to change
+        in the future.
+
+        Parameters
+        ----------
+        field_names: str or list of str
+            The name(s) of the field to index.
+        """
+        from .fts import create_index, populate_index
+
+        if isinstance(field_names, str):
+            field_names = [field_names]
+        index = create_index(self._get_fts_index_path(), field_names)
+        populate_index(index, self, field_names)
+
+    def _get_fts_index_path(self):
+        return os.path.join(self._dataset_uri, "_indices", "tantivy")
+
    @cached_property
    def _dataset(self) -> LanceDataset:
        return lance.dataset(self._dataset_uri, version=self._version)
@@ -158,7 +182,7 @@ class LanceTable:
        self._reset_dataset()
        return len(self)

-    def search(self, query: VEC) -> LanceQueryBuilder:
+    def search(self, query: Union[VEC, str]) -> LanceQueryBuilder:
        """Create a search query to find the nearest neighbors
        of the given query vector.

@@ -174,6 +198,10 @@ class LanceTable:
        and also the "score" column which is the distance between the query
        vector and the returned vector.
        """
+        if isinstance(query, str):
+            # fts
+            return LanceFtsQueryBuilder(self, query)
+
        if isinstance(query, list):
            query = np.array(query)
        if isinstance(query, np.ndarray):
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lancedb"
-version = "0.1.2"
-dependencies = ["pylance>=0.4.6", "ratelimiter", "retry", "tqdm"]
+version = "0.1.4"
+dependencies = ["pylance>=0.4.17", "ratelimiter", "retry", "tqdm"]
 description = "lancedb"
 authors = [
    { name = "LanceDB Devs", email = "dev@lancedb.com" },
--- a/python/tests/test_embeddings.py
+++ b/python/tests/test_embeddings.py
@@ -14,7 +14,6 @@ import sys

 import numpy as np
 import pyarrow as pa
-
 from lancedb.embeddings import with_embeddings


--- a/python/tests/test_fts.py
+++ b/python/tests/test_fts.py
@@ -0,0 +1,84 @@
+# Copyright 2023 LanceDB Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import os
+import random
+
+import lancedb.fts
+import numpy as np
+import pandas as pd
+import pytest
+import tantivy
+
+import lancedb as ldb
+
+
+@pytest.fixture
+def table(tmp_path) -> ldb.table.LanceTable:
+    db = ldb.connect(tmp_path)
+    vectors = [np.random.randn(128) for _ in range(100)]
+
+    nouns = ("puppy", "car", "rabbit", "girl", "monkey")
+    verbs = ("runs", "hits", "jumps", "drives", "barfs")
+    adv = ("crazily.", "dutifully.", "foolishly.", "merrily.", "occasionally.")
+    adj = ("adorable", "clueless", "dirty", "odd", "stupid")
+    text = [
+        " ".join(
+            [
+                nouns[random.randrange(0, 5)],
+                verbs[random.randrange(0, 5)],
+                adv[random.randrange(0, 5)],
+                adj[random.randrange(0, 5)],
+            ]
+        )
+        for _ in range(100)
+    ]
+    table = db.create_table(
+        "test", data=pd.DataFrame({"vector": vectors, "text": text, "text2": text})
+    )
+    return table
+
+
+def test_create_index(tmp_path):
+    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
+    assert isinstance(index, tantivy.Index)
+    assert os.path.exists(str(tmp_path / "index"))
+
+
+def test_populate_index(tmp_path, table):
+    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
+    assert ldb.fts.populate_index(index, table, ["text"]) == len(table)
+
+
+def test_search_index(tmp_path, table):
+    index = ldb.fts.create_index(str(tmp_path / "index"), ["text"])
+    ldb.fts.populate_index(index, table, ["text"])
+    index.reload()
+    results = ldb.fts.search_index(index, query="puppy", limit=10)
+    assert len(results) == 2
+    assert len(results[0]) == 10  # row_ids
+    assert len(results[1]) == 10  # scores
+
+
+def test_create_index_from_table(tmp_path, table):
+    table.create_fts_index("text")
+    df = table.search("puppy").limit(10).select(["text"]).to_df()
+    assert len(df) == 10
+    assert "text" in df.columns
+
+
+def test_create_index_multiple_columns(tmp_path, table):
+    table.create_fts_index(["text", "text2"])
+    df = table.search("puppy").limit(10).to_df()
+    assert len(df) == 10
+    assert "text" in df.columns
+    assert "text2" in df.columns
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -17,7 +17,6 @@ import pandas as pd
 import pandas.testing as tm
 import pyarrow as pa
 import pytest
-
 from lancedb.query import LanceQueryBuilder


--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -16,7 +16,6 @@ from pathlib import Path
 import pandas as pd
 import pyarrow as pa
 import pytest
-
 from lancedb.table import LanceTable


--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb-node"
-version = "0.1.2"
+version = "0.1.0"
 description = "Serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
 edition = "2018"
@@ -15,7 +15,7 @@ arrow-ipc = "37.0"
 arrow-schema = "37.0"
 once_cell = "1"
 futures = "0.3"
-lance = "0.4.3"
+lance = "0.4.17"
 vectordb = { path = "../../vectordb" }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
 neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
--- a/rust/ffi/node/release_process.md
+++ b/rust/ffi/node/release_process.md
@@ -1,85 +0,0 @@
-
-How to release the node module
-
-### 1. Bump the versions
-
-<!-- TODO: we also need to bump the optional dependencies for node! -->
-
-```shell
-pushd rust/vectordb
-cargo bump minor
-popd
-
-pushd rust/ffi/node
-cargo bump minor
-popd
-
-pushd python
-cargo bump minor
-popd
-
-pushd node
-npm version minor
-popd
-
-git add -u
-git commit -m "Bump versions"
-git push
-```
-
-### 2. Push a new tag
-
-```shell
-git tag vX.X.X
-git push --tag vX.X.X
-```
-
-When the tag is pushed, GitHub actions will start building the libraries and
-will upload them to a draft release. Wait for those jobs to complete.
-
-### 3. Publish the release
-
-Once the jobs are complete, you can edit the 
-
-2. Push a tag, such as vX.X.X. Once the tag is pushrf, GitHub actions will start
-   building the native libraries and uploading them to a draft release. Wait for
-   those jobs to complete.
-3. If the libraries are successful, edit the changelog and then publish the
-   release. Once you publish, a new action will start and upload all the 
-   release artifacts to npm.
-
-## Manual process
-
-You can build the artifacts locally on a MacOS machine.
-
-### Build the MacOS release libraries
-
-One-time setup:
-
-```shell
-rustup target add x86_64-apple-darwin aarch64-apple-darwin
-```
-
-To build:
-
-```shell
-bash ci/build_macos_artifacts.sh
-```
-
-### Build the Linux release libraries
-
-One-time setup, building the Docker container
-
-```shell
-cat ci/ubuntu_build.dockerfile | docker build -t lancedb-node-build -
-```
-
-To build:
-
-```shell
-docker run \
-    -v /var/run/docker.sock:/var/run/docker.sock \
-    -v $(pwd):/io -w /io \
-    lancedb-node-build \
-    bash ci/build_linux_artifacts.sh
-```
--- a/rust/ffi/node/src/index.rs
+++ b/rust/ffi/node/src/index.rs
@@ -0,0 +1,15 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod vector;
--- a/rust/ffi/node/src/index/vector.rs
+++ b/rust/ffi/node/src/index/vector.rs
@@ -0,0 +1,128 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::convert::TryFrom;
+
+use lance::index::vector::ivf::IvfBuildParams;
+use lance::index::vector::pq::PQBuildParams;
+use lance::index::vector::MetricType;
+use neon::context::FunctionContext;
+use neon::prelude::*;
+
+use vectordb::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
+
+use crate::{runtime, JsTable};
+
+pub(crate) fn table_create_vector_index(mut cx: FunctionContext) -> JsResult<JsPromise> {
+    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
+    let index_params = cx.argument::<JsObject>(0)?;
+    let index_params_builder = get_index_params_builder(&mut cx, index_params).unwrap();
+
+    let rt = runtime(&mut cx)?;
+    let channel = cx.channel();
+
+    let (deferred, promise) = cx.promise();
+    let table = js_table.table.clone();
+
+    rt.block_on(async move {
+        let add_result = table
+            .lock()
+            .unwrap()
+            .create_index(&index_params_builder)
+            .await;
+
+        deferred.settle_with(&channel, move |mut cx| {
+            add_result
+                .map(|_| cx.undefined())
+                .or_else(|err| cx.throw_error(err.to_string()))
+        });
+    });
+    Ok(promise)
+}
+
+fn get_index_params_builder(
+    cx: &mut FunctionContext,
+    obj: Handle<JsObject>,
+) -> Result<impl VectorIndexBuilder, String> {
+    let idx_type = obj
+        .get::<JsString, _, _>(cx, "type")
+        .map_err(|t| t.to_string())?
+        .value(cx);
+
+    match idx_type.as_str() {
+        "ivf_pq" => {
+            let mut index_builder: IvfPQIndexBuilder = IvfPQIndexBuilder::new();
+            let mut pq_params = PQBuildParams::default();
+
+            obj.get_opt::<JsString, _, _>(cx, "column")
+                .map_err(|t| t.to_string())?
+                .map(|s| index_builder.column(s.value(cx)));
+
+            obj.get_opt::<JsString, _, _>(cx, "index_name")
+                .map_err(|t| t.to_string())?
+                .map(|s| index_builder.index_name(s.value(cx)));
+
+            obj.get_opt::<JsString, _, _>(cx, "metric_type")
+                .map_err(|t| t.to_string())?
+                .map(|s| MetricType::try_from(s.value(cx).as_str()))
+                .map(|mt| {
+                    let metric_type = mt.unwrap();
+                    index_builder.metric_type(metric_type);
+                    pq_params.metric_type = metric_type;
+                });
+
+            let num_partitions = obj
+                .get_opt::<JsNumber, _, _>(cx, "num_partitions")
+                .map_err(|t| t.to_string())?
+                .map(|s| s.value(cx) as usize);
+
+            let max_iters = obj
+                .get_opt::<JsNumber, _, _>(cx, "max_iters")
+                .map_err(|t| t.to_string())?
+                .map(|s| s.value(cx) as usize);
+
+            num_partitions.map(|np| {
+                let max_iters = max_iters.unwrap_or(50);
+                let ivf_params = IvfBuildParams {
+                    num_partitions: np,
+                    max_iters,
+                };
+                index_builder.ivf_params(ivf_params)
+            });
+
+            obj.get_opt::<JsBoolean, _, _>(cx, "use_opq")
+                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.use_opq = s.value(cx));
+
+            obj.get_opt::<JsNumber, _, _>(cx, "num_sub_vectors")
+                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.num_sub_vectors = s.value(cx) as usize);
+
+            obj.get_opt::<JsNumber, _, _>(cx, "num_bits")
+                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.num_bits = s.value(cx) as usize);
+
+            obj.get_opt::<JsNumber, _, _>(cx, "max_iters")
+                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.max_iters = s.value(cx) as usize);
+
+            obj.get_opt::<JsNumber, _, _>(cx, "max_opq_iters")
+                .map_err(|t| t.to_string())?
+                .map(|s| pq_params.max_opq_iters = s.value(cx) as usize);
+
+            Ok(index_builder)
+        }
+        t => Err(format!("{} is not a valid index type", t).to_string()),
+    }
+}
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use std::collections::HashMap;
+use std::convert::TryFrom;
 use std::ops::Deref;
 use std::sync::{Arc, Mutex};

@@ -21,6 +22,7 @@ use arrow_ipc::writer::FileWriter;
 use futures::{TryFutureExt, TryStreamExt};
 use lance::arrow::RecordBatchBuffer;
 use lance::dataset::WriteMode;
+use lance::index::vector::MetricType;
 use neon::prelude::*;
 use neon::types::buffer::TypedArray;
 use once_cell::sync::OnceCell;
@@ -34,17 +36,18 @@ use crate::arrow::arrow_buffer_to_record_batch;

 mod arrow;
 mod convert;
+mod index;

 struct JsDatabase {
    database: Arc<Database>,
 }

+impl Finalize for JsDatabase {}
+
 struct JsTable {
    table: Arc<Mutex<Table>>,
 }

-impl Finalize for JsDatabase {}
-
 impl Finalize for JsTable {}

 fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
@@ -53,23 +56,46 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
    RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
 }

-fn database_new(mut cx: FunctionContext) -> JsResult<JsBox<JsDatabase>> {
+fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let path = cx.argument::<JsString>(0)?.value(&mut cx);
-    let db = JsDatabase {
-        database: Arc::new(Database::connect(path).or_else(|err| cx.throw_error(err.to_string()))?),
-    };
-    Ok(cx.boxed(db))
+
+    let rt = runtime(&mut cx)?;
+    let channel = cx.channel();
+    let (deferred, promise) = cx.promise();
+
+    rt.spawn(async move {
+        let database = Database::connect(&path).await;
+
+        deferred.settle_with(&channel, move |mut cx| {
+            let db = JsDatabase {
+                database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
+            };
+            Ok(cx.boxed(db))
+        });
+    });
+    Ok(promise)
 }

-fn database_table_names(mut cx: FunctionContext) -> JsResult<JsArray> {
+fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let db = cx
        .this()
        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
-    let tables = db
-        .database
-        .table_names()
-        .or_else(|err| cx.throw_error(err.to_string()))?;
-    convert::vec_str_to_array(&tables, &mut cx)
+
+    let rt = runtime(&mut cx)?;
+    let (deferred, promise) = cx.promise();
+    let channel = cx.channel();
+    let database = db.database.clone();
+
+    rt.spawn(async move {
+        let tables_rst = database.table_names().await;
+
+        deferred.settle_with(&channel, move |mut cx| {
+            let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
+            let table_names = convert::vec_str_to_array(&tables, &mut cx);
+            table_names
+        });
+    });
+    Ok(promise)
 }

 fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
@@ -84,10 +110,12 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {

    let (deferred, promise) = cx.promise();
    rt.spawn(async move {
-        let table_rst = database.open_table(table_name).await;
+        let table_rst = database.open_table(&table_name).await;

        deferred.settle_with(&channel, move |mut cx| {
-            let table = Arc::new(Mutex::new(table_rst.or_else(|err| cx.throw_error(err.to_string()))?));
+            let table = Arc::new(Mutex::new(
+                table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
+            ));
            Ok(cx.boxed(JsTable { table }))
        });
    });
@@ -96,15 +124,32 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {

 fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
-    let query_vector = cx.argument::<JsArray>(0)?; //. .as_value(&mut cx);
-    let limit = cx.argument::<JsNumber>(1)?.value(&mut cx);
-    let filter = cx.argument_opt(2).map(|f| f.downcast_or_throw::<JsString, _>(&mut cx).unwrap().value(&mut cx));
+    let query_obj = cx.argument::<JsObject>(0)?;
+
+    let limit = query_obj
+        .get::<JsNumber, _, _>(&mut cx, "_limit")?
+        .value(&mut cx);
+    let filter = query_obj
+        .get_opt::<JsString, _, _>(&mut cx, "_filter")?
+        .map(|s| s.value(&mut cx));
+    let refine_factor = query_obj
+        .get_opt::<JsNumber, _, _>(&mut cx, "_refineFactor")?
+        .map(|s| s.value(&mut cx))
+        .map(|i| i as u32);
+    let nprobes = query_obj
+        .get::<JsNumber, _, _>(&mut cx, "_nprobes")?
+        .value(&mut cx) as usize;
+    let metric_type = query_obj
+        .get_opt::<JsString, _, _>(&mut cx, "_metricType")?
+        .map(|s| s.value(&mut cx))
+        .map(|s| MetricType::try_from(s.as_str()).unwrap());

    let rt = runtime(&mut cx)?;
    let channel = cx.channel();

    let (deferred, promise) = cx.promise();
    let table = js_table.table.clone();
+    let query_vector = query_obj.get::<JsArray, _, _>(&mut cx, "_queryVector")?;
    let query = convert::js_array_to_vec(query_vector.deref(), &mut cx);

    rt.spawn(async move {
@@ -113,7 +158,10 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
            .unwrap()
            .search(Float32Array::from(query))
            .limit(limit as usize)
-            .filter(filter);
+            .refine_factor(refine_factor)
+            .nprobes(nprobes)
+            .filter(filter)
+            .metric_type(metric_type);
        let record_batch_stream = builder.execute();
        let results = record_batch_stream
            .and_then(|stream| stream.try_collect::<Vec<_>>().map_err(Error::from))
@@ -161,10 +209,12 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {

    rt.block_on(async move {
        let batch_reader: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(batches));
-        let table_rst = database.create_table(table_name, batch_reader).await;
+        let table_rst = database.create_table(&table_name, batch_reader).await;

        deferred.settle_with(&channel, move |mut cx| {
-            let table = Arc::new(Mutex::new(table_rst.or_else(|err| cx.throw_error(err.to_string()))?));
+            let table = Arc::new(Mutex::new(
+                table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
+            ));
            Ok(cx.boxed(JsTable { table }))
        });
    });
@@ -178,9 +228,7 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
        ("overwrite", WriteMode::Overwrite),
    ]);

-    let js_table = cx
-        .this()
-        .downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
+    let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
    let buffer = cx.argument::<JsBuffer>(0)?;
    let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
    let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
@@ -204,7 +252,6 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
    Ok(promise)
 }

-
 #[neon::main]
 fn main(mut cx: ModuleContext) -> NeonResult<()> {
    cx.export_function("databaseNew", database_new)?;
@@ -213,5 +260,9 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
    cx.export_function("tableSearch", table_search)?;
    cx.export_function("tableCreate", table_create)?;
    cx.export_function("tableAdd", table_add)?;
+    cx.export_function(
+        "tableCreateVectorIndex",
+        index::vector::table_create_vector_index,
+    )?;
    Ok(())
 }
--- a/rust/vectordb/Cargo.toml
+++ b/rust/vectordb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectordb"
-version = "0.1.2"
+version = "0.0.1"
 edition = "2021"
 description = "Serverless, low-latency vector database for AI applications"
 license = "Apache-2.0"
@@ -10,9 +10,13 @@ repository = "https://github.com/lancedb/lancedb"

 [dependencies]
 arrow-array = "37.0"
+arrow-data = "37.0"
 arrow-schema = "37.0"
-lance = "0.4.3"
+object_store = "0.5.6"
+
+lance = "0.4.17"
 tokio = { version = "1.23", features = ["rt-multi-thread"] }

 [dev-dependencies]
 tempfile = "3.5.0"
+rand = { version = "0.8.3", features = ["small_rng"] }
--- a/rust/vectordb/src/database.rs
+++ b/rust/vectordb/src/database.rs
@@ -12,16 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use arrow_array::RecordBatchReader;
 use std::fs::create_dir_all;
-use std::path::{Path, PathBuf};
-use std::sync::Arc;
+use std::path::Path;
+
+use arrow_array::RecordBatchReader;
+use lance::io::object_store::ObjectStore;

 use crate::error::Result;
 use crate::table::Table;

 pub struct Database {
-    pub(crate) path: Arc<PathBuf>,
+    object_store: ObjectStore,
+
+    pub(crate) uri: String,
 }

 const LANCE_EXTENSION: &str = "lance";
@@ -37,12 +40,17 @@ impl Database {
    /// # Returns
    ///
    /// * A [Database] object.
-    pub fn connect<P: AsRef<Path>>(path: P) -> Result<Database> {
-        if !path.as_ref().try_exists()? {
-            create_dir_all(&path)?;
+    pub async fn connect(uri: &str) -> Result<Database> {
+        let object_store = ObjectStore::new(uri).await?;
+        if object_store.is_local() {
+            let path = Path::new(uri);
+            if !path.try_exists()? {
+                create_dir_all(&path)?;
+            }
        }
        Ok(Database {
-            path: Arc::new(path.as_ref().to_path_buf()),
+            uri: uri.to_string(),
+            object_store,
        })
    }

@@ -51,12 +59,13 @@ impl Database {
    /// # Returns
    ///
    /// * A [Vec<String>] with all table names.
-    pub fn table_names(&self) -> Result<Vec<String>> {
+    pub async fn table_names(&self) -> Result<Vec<String>> {
        let f = self
-            .path
-            .read_dir()?
-            .flatten()
-            .map(|dir_entry| dir_entry.path())
+            .object_store
+            .read_dir("/")
+            .await?
+            .iter()
+            .map(|fname| Path::new(fname))
            .filter(|path| {
                let is_lance = path
                    .extension()
@@ -76,10 +85,10 @@ impl Database {

    pub async fn create_table(
        &self,
-        name: String,
+        name: &str,
        batches: Box<dyn RecordBatchReader>,
    ) -> Result<Table> {
-        Table::create(self.path.clone(), name, batches).await
+        Table::create(&self.uri, name, batches).await
    }

    /// Open a table in the database.
@@ -90,8 +99,8 @@ impl Database {
    /// # Returns
    ///
    /// * A [Table] object.
-    pub async fn open_table(&self, name: String) -> Result<Table> {
-        Table::open(self.path.clone(), name).await
+    pub async fn open_table(&self, name: &str) -> Result<Table> {
+        Table::open(&self.uri, name).await
    }
 }

@@ -105,10 +114,10 @@ mod tests {
    #[tokio::test]
    async fn test_connect() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
-        let db = Database::connect(&path_buf);
+        let uri = tmp_dir.path().to_str().unwrap();
+        let db = Database::connect(uri).await.unwrap();

-        assert_eq!(db.unwrap().path.as_path(), path_buf.as_path())
+        assert_eq!(db.uri, uri);
    }

    #[tokio::test]
@@ -118,10 +127,16 @@ mod tests {
        create_dir_all(tmp_dir.path().join("table2.lance")).unwrap();
        create_dir_all(tmp_dir.path().join("invalidlance")).unwrap();

-        let db = Database::connect(&tmp_dir.into_path()).unwrap();
-        let tables = db.table_names().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let db = Database::connect(uri).await.unwrap();
+        let tables = db.table_names().await.unwrap();
        assert_eq!(tables.len(), 2);
        assert!(tables.contains(&String::from("table1")));
        assert!(tables.contains(&String::from("table2")));
    }
+
+    #[tokio::test]
+    async fn test_connect_s3() {
+        // let db = Database::connect("s3://bucket/path/to/database").await.unwrap();
+    }
 }
--- a/rust/vectordb/src/error.rs
+++ b/rust/vectordb/src/error.rs
@@ -41,3 +41,15 @@ impl From<lance::Error> for Error {
        Self::Lance(e.to_string())
    }
 }
+
+impl From<object_store::Error> for Error {
+    fn from(e: object_store::Error) -> Self {
+        Self::IO(e.to_string())
+    }
+}
+
+impl From<object_store::path::Error> for Error {
+    fn from(e: object_store::path::Error) -> Self {
+        Self::IO(e.to_string())
+    }
+}
--- a/rust/vectordb/src/index.rs
+++ b/rust/vectordb/src/index.rs
@@ -0,0 +1,15 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pub mod vector;
--- a/rust/vectordb/src/index/vector.rs
+++ b/rust/vectordb/src/index/vector.rs
@@ -0,0 +1,163 @@
+// Copyright 2023 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use lance::index::vector::ivf::IvfBuildParams;
+use lance::index::vector::pq::PQBuildParams;
+use lance::index::vector::{MetricType, VectorIndexParams};
+
+pub trait VectorIndexBuilder {
+    fn get_column(&self) -> Option<String>;
+    fn get_index_name(&self) -> Option<String>;
+    fn build(&self) -> VectorIndexParams;
+}
+
+pub struct IvfPQIndexBuilder {
+    column: Option<String>,
+    index_name: Option<String>,
+    metric_type: Option<MetricType>,
+    ivf_params: Option<IvfBuildParams>,
+    pq_params: Option<PQBuildParams>,
+}
+
+impl IvfPQIndexBuilder {
+    pub fn new() -> IvfPQIndexBuilder {
+        IvfPQIndexBuilder {
+            column: None,
+            index_name: None,
+            metric_type: None,
+            ivf_params: None,
+            pq_params: None,
+        }
+    }
+}
+
+impl IvfPQIndexBuilder {
+    pub fn column(&mut self, column: String) -> &mut IvfPQIndexBuilder {
+        self.column = Some(column);
+        self
+    }
+
+    pub fn index_name(&mut self, index_name: String) -> &mut IvfPQIndexBuilder {
+        self.index_name = Some(index_name);
+        self
+    }
+
+    pub fn metric_type(&mut self, metric_type: MetricType) -> &mut IvfPQIndexBuilder {
+        self.metric_type = Some(metric_type);
+        self
+    }
+
+    pub fn ivf_params(&mut self, ivf_params: IvfBuildParams) -> &mut IvfPQIndexBuilder {
+        self.ivf_params = Some(ivf_params);
+        self
+    }
+
+    pub fn pq_params(&mut self, pq_params: PQBuildParams) -> &mut IvfPQIndexBuilder {
+        self.pq_params = Some(pq_params);
+        self
+    }
+}
+
+impl VectorIndexBuilder for IvfPQIndexBuilder {
+    fn get_column(&self) -> Option<String> {
+        self.column.clone()
+    }
+
+    fn get_index_name(&self) -> Option<String> {
+        self.index_name.clone()
+    }
+
+    fn build(&self) -> VectorIndexParams {
+        let ivf_params = self.ivf_params.clone().unwrap_or(IvfBuildParams::default());
+        let pq_params = self.pq_params.clone().unwrap_or(PQBuildParams::default());
+
+        VectorIndexParams::with_ivf_pq_params(pq_params.metric_type, ivf_params, pq_params)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use lance::index::vector::ivf::IvfBuildParams;
+    use lance::index::vector::pq::PQBuildParams;
+    use lance::index::vector::{MetricType, StageParams};
+
+    use crate::index::vector::{IvfPQIndexBuilder, VectorIndexBuilder};
+
+    #[test]
+    fn test_builder_no_params() {
+        let index_builder = IvfPQIndexBuilder::new();
+        assert!(index_builder.get_column().is_none());
+        assert!(index_builder.get_index_name().is_none());
+
+        let index_params = index_builder.build();
+        assert_eq!(index_params.stages.len(), 2);
+        if let StageParams::Ivf(ivf_params) = index_params.stages.get(0).unwrap() {
+            let default = IvfBuildParams::default();
+            assert_eq!(ivf_params.num_partitions, default.num_partitions);
+            assert_eq!(ivf_params.max_iters, default.max_iters);
+        } else {
+            panic!("Expected first stage to be ivf")
+        }
+
+        if let StageParams::PQ(pq_params) = index_params.stages.get(1).unwrap() {
+            assert_eq!(pq_params.use_opq, false);
+        } else {
+            panic!("Expected second stage to be pq")
+        }
+    }
+
+    #[test]
+    fn test_builder_all_params() {
+        let mut index_builder = IvfPQIndexBuilder::new();
+
+        index_builder
+            .column("c".to_owned())
+            .metric_type(MetricType::Cosine)
+            .index_name("index".to_owned());
+
+        assert_eq!(index_builder.column.clone().unwrap(), "c");
+        assert_eq!(index_builder.metric_type.unwrap(), MetricType::Cosine);
+        assert_eq!(index_builder.index_name.clone().unwrap(), "index");
+
+        let ivf_params = IvfBuildParams::new(500);
+        let mut pq_params = PQBuildParams::default();
+        pq_params.use_opq = true;
+        pq_params.max_iters = 1;
+        pq_params.num_bits = 8;
+        pq_params.num_sub_vectors = 50;
+        pq_params.metric_type = MetricType::Cosine;
+        pq_params.max_opq_iters = 2;
+        index_builder.ivf_params(ivf_params);
+        index_builder.pq_params(pq_params);
+
+        let index_params = index_builder.build();
+        assert_eq!(index_params.stages.len(), 2);
+        if let StageParams::Ivf(ivf_params) = index_params.stages.get(0).unwrap() {
+            assert_eq!(ivf_params.num_partitions, 500);
+        } else {
+            assert!(false, "Expected first stage to be ivf")
+        }
+
+        if let StageParams::PQ(pq_params) = index_params.stages.get(1).unwrap() {
+            assert_eq!(pq_params.use_opq, true);
+            assert_eq!(pq_params.max_iters, 1);
+            assert_eq!(pq_params.num_bits, 8);
+            assert_eq!(pq_params.num_sub_vectors, 50);
+            assert_eq!(pq_params.metric_type, MetricType::Cosine);
+            assert_eq!(pq_params.max_opq_iters, 2);
+        } else {
+            assert!(false, "Expected second stage to be pq")
+        }
+    }
+}
--- a/rust/vectordb/src/lib.rs
+++ b/rust/vectordb/src/lib.rs
@@ -14,5 +14,6 @@

 pub mod database;
 pub mod error;
+pub mod index;
 pub mod query;
 pub mod table;
--- a/rust/vectordb/src/query.rs
+++ b/rust/vectordb/src/query.rs
@@ -29,7 +29,7 @@ pub struct Query {
    pub filter: Option<String>,
    pub nprobes: usize,
    pub refine_factor: Option<u32>,
-    pub metric_type: MetricType,
+    pub metric_type: Option<MetricType>,
    pub use_index: bool,
 }

@@ -51,9 +51,9 @@ impl Query {
            limit: 10,
            nprobes: 20,
            refine_factor: None,
-            metric_type: MetricType::L2,
+            metric_type: None,
            use_index: false,
-            filter: None
+            filter: None,
        }
    }

@@ -71,10 +71,10 @@ impl Query {
            self.limit,
        )?;
        scanner.nprobs(self.nprobes);
-        scanner.distance_metric(self.metric_type);
        scanner.use_index(self.use_index);
        self.filter.as_ref().map(|f| scanner.filter(f));
        self.refine_factor.map(|rf| scanner.refine(rf));
+        self.metric_type.map(|mt| scanner.distance_metric(mt));
        Ok(scanner.try_into_stream().await?)
    }

@@ -123,7 +123,7 @@ impl Query {
    /// # Arguments
    ///
    /// * `metric_type` - The distance metric to use. By default [MetricType::L2] is used.
-    pub fn metric_type(mut self, metric_type: MetricType) -> Query {
+    pub fn metric_type(mut self, metric_type: Option<MetricType>) -> Query {
        self.metric_type = metric_type;
        self
    }
@@ -174,14 +174,14 @@ mod tests {
            .limit(100)
            .nprobes(1000)
            .use_index(true)
-            .metric_type(MetricType::Cosine)
+            .metric_type(Some(MetricType::Cosine))
            .refine_factor(Some(999));

        assert_eq!(query.query_vector, new_vector);
        assert_eq!(query.limit, 100);
        assert_eq!(query.nprobes, 1000);
        assert_eq!(query.use_index, true);
-        assert_eq!(query.metric_type, MetricType::Cosine);
+        assert_eq!(query.metric_type, Some(MetricType::Cosine));
        assert_eq!(query.refine_factor, Some(999));
    }

--- a/rust/vectordb/src/table.rs
+++ b/rust/vectordb/src/table.rs
@@ -12,26 +12,33 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::path::PathBuf;
+use std::path::Path;
 use std::sync::Arc;

 use arrow_array::{Float32Array, RecordBatchReader};
 use lance::dataset::{Dataset, WriteMode, WriteParams};
+use lance::index::IndexType;

 use crate::error::{Error, Result};
+use crate::index::vector::VectorIndexBuilder;
 use crate::query::Query;

 pub const VECTOR_COLUMN_NAME: &str = "vector";
-
 pub const LANCE_FILE_EXTENSION: &str = "lance";

 /// A table in a LanceDB database.
 pub struct Table {
    name: String,
-    path: String,
+    uri: String,
    dataset: Arc<Dataset>,
 }

+impl std::fmt::Display for Table {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Table({})", self.name)
+    }
+}
+
 impl Table {
    /// Opens an existing Table
    ///
@@ -43,18 +50,21 @@ impl Table {
    /// # Returns
    ///
    /// * A [Table] object.
-    pub async fn open(base_path: Arc<PathBuf>, name: String) -> Result<Self> {
-        let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
-        let ds_uri = ds_path
+    pub async fn open(base_uri: &str, name: &str) -> Result<Self> {
+        let path = Path::new(base_uri);
+
+        let table_uri = path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
+        let uri = table_uri
+            .as_path()
            .to_str()
-            .ok_or(Error::IO(format!("Unable to find table {}", name)))?;
-        let dataset = Dataset::open(ds_uri).await?;
-        let table = Table {
-            name,
-            path: ds_uri.to_string(),
+            .ok_or(Error::IO(format!("Invalid table name: {}", name)))?;
+
+        let dataset = Dataset::open(&uri).await?;
+        Ok(Table {
+            name: name.to_string(),
+            uri: uri.to_string(),
            dataset: Arc::new(dataset),
-        };
-        Ok(table)
+        })
    }

    /// Creates a new Table
@@ -69,18 +79,44 @@ impl Table {
    ///
    /// * A [Table] object.
    pub async fn create(
-        base_path: Arc<PathBuf>,
-        name: String,
+        base_uri: &str,
+        name: &str,
        mut batches: Box<dyn RecordBatchReader>,
    ) -> Result<Self> {
-        let ds_path = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
-        let path = ds_path
+        let base_path = Path::new(base_uri);
+        let table_uri = base_path.join(format!("{}.{}", name, LANCE_FILE_EXTENSION));
+        let uri = table_uri
+            .as_path()
            .to_str()
-            .ok_or(Error::IO(format!("Unable to find table {}", name)))?;
-
+            .ok_or(Error::IO(format!("Invalid table name: {}", name)))?
+            .to_string();
        let dataset =
-            Arc::new(Dataset::write(&mut batches, path, Some(WriteParams::default())).await?);
-        Ok(Table { name, path: path.to_string(), dataset })
+            Arc::new(Dataset::write(&mut batches, &uri, Some(WriteParams::default())).await?);
+        Ok(Table {
+            name: name.to_string(),
+            uri,
+            dataset,
+        })
+    }
+
+    /// Create index on the table.
+    pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
+        use lance::index::DatasetIndexExt;
+
+        let dataset = self
+            .dataset
+            .create_index(
+                &[index_builder
+                    .get_column()
+                    .unwrap_or(VECTOR_COLUMN_NAME.to_string())
+                    .as_str()],
+                IndexType::Vector,
+                index_builder.get_index_name(),
+                &index_builder.build(),
+            )
+            .await?;
+        self.dataset = Arc::new(dataset);
+        Ok(())
    }

    /// Insert records into this Table
@@ -95,12 +131,12 @@ impl Table {
    pub async fn add(
        &mut self,
        mut batches: Box<dyn RecordBatchReader>,
-        write_mode: Option<WriteMode>
+        write_mode: Option<WriteMode>,
    ) -> Result<usize> {
        let mut params = WriteParams::default();
        params.mode = write_mode.unwrap_or(WriteMode::Append);

-        self.dataset = Arc::new(Dataset::write(&mut batches, self.path.as_str(), Some(params)).await?);
+        self.dataset = Arc::new(Dataset::write(&mut batches, &self.uri, Some(params)).await?);
        Ok(batches.count())
    }

@@ -125,60 +161,72 @@ impl Table {

 #[cfg(test)]
 mod tests {
-    use arrow_array::{Float32Array, Int32Array, RecordBatch, RecordBatchReader};
+    use std::sync::Arc;
+
+    use arrow_array::{
+        Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchReader,
+    };
+    use arrow_data::ArrayDataBuilder;
    use arrow_schema::{DataType, Field, Schema};
    use lance::arrow::RecordBatchBuffer;
    use lance::dataset::{Dataset, WriteMode};
-    use std::sync::Arc;
+    use lance::index::vector::ivf::IvfBuildParams;
+    use lance::index::vector::pq::PQBuildParams;
+    use rand::Rng;
    use tempfile::tempdir;

-    use crate::table::Table;
+    use super::*;
+    use crate::index::vector::IvfPQIndexBuilder;

    #[tokio::test]
    async fn test_new_table_not_exists() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
+        let uri = tmp_dir.path().to_str().unwrap();

-        let table = Table::open(Arc::new(path_buf), "test".to_string()).await;
+        let table = Table::open(&uri, "test").await;
        assert!(table.is_err());
    }

    #[tokio::test]
    async fn test_open() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
+        let dataset_path = tmp_dir.path().join("test.lance");
+        let uri = tmp_dir.path().to_str().unwrap();

        let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
-        Dataset::write(
-            &mut batches,
-            path_buf.join("test.lance").to_str().unwrap(),
-            None,
-        )
-        .await
-        .unwrap();
-
-        let table = Table::open(Arc::new(path_buf), "test".to_string())
+        Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
            .await
            .unwrap();

+        let table = Table::open(uri, "test").await.unwrap();
+
        assert_eq!(table.name, "test")
    }

+    #[test]
+    fn test_object_store_path() {
+        use std::path::Path as StdPath;
+        let p = StdPath::new("s3://bucket/path/to/file");
+        let c = p.join("subfile");
+        assert_eq!(c.to_str().unwrap(), "s3://bucket/path/to/file/subfile");
+    }
+
    #[tokio::test]
    async fn test_add() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
+        let uri = tmp_dir.path().to_str().unwrap();

        let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
        let schema = batches.schema().clone();
-        let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches).await.unwrap();
+        let mut table = Table::create(&uri, "test", batches).await.unwrap();
        assert_eq!(table.count_rows().await.unwrap(), 10);

-        let new_batches: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
-            schema,
-            vec![Arc::new(Int32Array::from_iter_values(100..110))],
-        )
-       .unwrap()]));
+        let new_batches: Box<dyn RecordBatchReader> =
+            Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
+                schema,
+                vec![Arc::new(Int32Array::from_iter_values(100..110))],
+            )
+            .unwrap()]));

        table.add(new_batches, None).await.unwrap();
        assert_eq!(table.count_rows().await.unwrap(), 20);
@@ -188,19 +236,24 @@ mod tests {
    #[tokio::test]
    async fn test_add_overwrite() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
+        let uri = tmp_dir.path().to_str().unwrap();

        let batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
        let schema = batches.schema().clone();
-        let mut table = Table::create(Arc::new(path_buf), "test".to_string(), batches).await.unwrap();
+        let mut table = Table::create(uri, "test", batches).await.unwrap();
        assert_eq!(table.count_rows().await.unwrap(), 10);

-        let new_batches: Box<dyn RecordBatchReader> = Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
-            schema,
-            vec![Arc::new(Int32Array::from_iter_values(100..110))],
-        ).unwrap()]));
+        let new_batches: Box<dyn RecordBatchReader> =
+            Box::new(RecordBatchBuffer::new(vec![RecordBatch::try_new(
+                schema,
+                vec![Arc::new(Int32Array::from_iter_values(100..110))],
+            )
+            .unwrap()]));

-        table.add(new_batches, Some(WriteMode::Overwrite)).await.unwrap();
+        table
+            .add(new_batches, Some(WriteMode::Overwrite))
+            .await
+            .unwrap();
        assert_eq!(table.count_rows().await.unwrap(), 10);
        assert_eq!(table.name, "test");
    }
@@ -208,21 +261,16 @@ mod tests {
    #[tokio::test]
    async fn test_search() {
        let tmp_dir = tempdir().unwrap();
-        let path_buf = tmp_dir.into_path();
+        let dataset_path = tmp_dir.path().join("test.lance");
+        let uri = tmp_dir.path().to_str().unwrap();

        let mut batches: Box<dyn RecordBatchReader> = Box::new(make_test_batches());
-        Dataset::write(
-            &mut batches,
-            path_buf.join("test.lance").to_str().unwrap(),
-            None,
-        )
-        .await
-        .unwrap();
-
-        let table = Table::open(Arc::new(path_buf), "test".to_string())
+        Dataset::write(&mut batches, dataset_path.to_str().unwrap(), None)
            .await
            .unwrap();

+        let table = Table::open(uri, "test").await.unwrap();
+
        let vector = Float32Array::from_iter_values([0.1, 0.2]);
        let query = table.search(vector.clone());
        assert_eq!(vector, query.query_vector);
@@ -236,4 +284,72 @@ mod tests {
        )
        .unwrap()])
    }
+
+    #[tokio::test]
+    async fn test_create_index() {
+        use arrow_array::RecordBatch;
+        use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+        use rand;
+        use std::iter::repeat_with;
+
+        use arrow_array::Float32Array;
+
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let dimension = 16;
+        let schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "embeddings",
+            DataType::FixedSizeList(
+                Arc::new(Field::new("item", DataType::Float32, true)),
+                dimension,
+            ),
+            false,
+        )]));
+
+        let mut rng = rand::thread_rng();
+        let float_arr = Float32Array::from(
+            repeat_with(|| rng.gen::<f32>())
+                .take(512 * dimension as usize)
+                .collect::<Vec<f32>>(),
+        );
+
+        let vectors = Arc::new(create_fixed_size_list(float_arr, dimension).unwrap());
+        let batches = RecordBatchBuffer::new(vec![RecordBatch::try_new(
+            schema.clone(),
+            vec![vectors.clone()],
+        )
+        .unwrap()]);
+
+        let reader: Box<dyn RecordBatchReader + Send> = Box::new(batches);
+        let mut table = Table::create(uri, "test", reader).await.unwrap();
+
+        let mut i = IvfPQIndexBuilder::new();
+
+        let index_builder = i
+            .column("embeddings".to_string())
+            .index_name("my_index".to_string())
+            .ivf_params(IvfBuildParams::new(256))
+            .pq_params(PQBuildParams::default());
+
+        table.create_index(index_builder).await.unwrap();
+
+        assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
+        assert_eq!(table.count_rows().await.unwrap(), 512);
+        assert_eq!(table.name, "test");
+    }
+
+    fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
+        let list_type = DataType::FixedSizeList(
+            Arc::new(Field::new("item", values.data_type().clone(), true)),
+            list_size,
+        );
+        let data = ArrayDataBuilder::new(list_type)
+            .len(values.len() / list_size as usize)
+            .add_child_data(values.into_data())
+            .build()
+            .unwrap();
+
+        Ok(FixedSizeListArray::from(data))
+    }
 }
Author	SHA1	Message	Date
Chang She	2b26775ed1	python v0.1.4	2023-05-31 20:11:25 -07:00
Lei Xu	306ada5cb8	Support S3 and GCS from typescript SDK (#106 )	2023-05-30 21:32:17 -07:00
gsilvestrin	d3aa8bfbc5	add embedding functions to the nodejs client (#95 )	2023-05-26 18:09:20 -07:00
Chang She	04d97347d7	move tantivy-py installation to be separate from wheel (#97 ) pypi does not allow packages to be uploaded that has a direct reference for now we'll just ask the user to install tantivy separately --------- Co-authored-by: Chang She <chang@lancedb.com>	2023-05-25 17:57:26 -06:00
Chang She	22aa8a93c2	bump version for v0.1.3	2023-05-25 17:01:52 -06:00
Chang She	f485378ea4	Basic full text search capabilities (#62 ) This is v1 of integrating full text search index into LanceDB. # API The query API is roughly the same as before, except if the input is text instead of a vector we assume that its fts search. ## Example If `table` is a LanceDB LanceTable, then: Build index: `table.create_fts_index("text")` Query: `df = table.search("puppy").limit(10).select(["text"]).to_df()` # Implementation Here we use the tantivy-py package to build the index. We then use the row id's as the full-text-search index's doc id then we just do a Take operation to fetch the rows. # Limitations 1. don't support incremental row appends yet. New data won't show up in search 2. local filesystem only 3. requires building tantivy explicitly --------- Co-authored-by: Chang She <chang@lancedb.com>	2023-05-24 22:25:31 -06:00
gsilvestrin	f923cfe47f	add create index to nodejs client (#89 )	2023-05-24 16:45:58 -06:00
gsilvestrin	06cb7b6458	add query params to to nodejs client (#87 )	2023-05-24 15:48:31 -06:00
gsilvestrin	bdef634954	bugfix: string columns should be converted to Utf8Array (#94 )	2023-05-23 14:58:49 -07:00