Bump to 0.1.11

Bump minimal lance version to 0.5.8 (#318 )
feat(node): Add Windows support (#294 )
2025-12-23 21:39:57 +00:00 · 2023-07-17 12:45:17 -07:00 · 2023-07-17 12:41:29 -07:00 · 2023-07-17 08:48:24 -07:00 · 2023-07-16 21:45:55 -07:00 · 2023-07-16 13:24:38 -07:00
33 changed files with 719 additions and 528 deletions
--- a/.github/workflows/docs_test.yml
+++ b/.github/workflows/docs_test.yml
@@ -81,7 +81,7 @@ jobs:
      run: |
        cd docs/test/node_modules/vectordb 
        npm ci
-        npm run build
+        npm run build-release
        npm run tsc
    - name: Create test files
      run: |
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -116,6 +116,39 @@ jobs:
        path: |
          node/dist/vectordb-linux*.tgz
  node-windows:
    runs-on: windows-2022
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    strategy:
      fail-fast: false
      matrix:
        target: [x86_64-pc-windows-msvc]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
      - name: Install Protoc v21.12
        working-directory: C:\
        run: |
          New-Item -Path 'C:\protoc' -ItemType Directory
          Set-Location C:\protoc
          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
          7z x protoc.zip
          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
        shell: powershell
      - name: Install npm dependencies
        run: |
          cd node
          npm ci
      - name: Build Windows native node modules
        run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
      - name: Upload Windows Artifacts
        uses: actions/upload-artifact@v3
        with:
          name: windows-native
          path: |
            node/dist/vectordb-win32*.tgz
  release:
    needs: [node, node-macos, node-linux]
    runs-on: ubuntu-latest
@@ -132,6 +165,7 @@ jobs:
        env:
          NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
        run: |
-          for filename in */*.tgz; do
+          mv */*.tgz .
          for filename in *.tgz; do
            npm publish $filename
          done
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -66,3 +66,24 @@ jobs:
        run: cargo build --all-features
      - name: Run tests
        run: cargo test --all-features
  windows:
    runs-on: windows-2022
    steps:
      - uses: actions/checkout@v3
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
      - name: Install Protoc v21.12
        working-directory: C:\
        run: |
          New-Item -Path 'C:\protoc' -ItemType Directory
          Set-Location C:\protoc
          Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
          7z x protoc.zip
          Add-Content $env:GITHUB_PATH "C:\protoc\bin"
        shell: powershell
      - name: Run tests
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
          cargo build
          cargo test
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,9 +6,11 @@ members = [
 resolver = "2"
 [workspace.dependencies]
-lance = "=0.5.5"
+lance = "=0.5.8"
 arrow-array = "42.0"
 arrow-data = "42.0"
 arrow-schema = "42.0"
 arrow-ipc = "42.0"
 half = { "version" = "2.2.1", default-features = false }
 object_store = "0.6.1"
--- a/ci/build_windows_artifacts.ps1
+++ b/ci/build_windows_artifacts.ps1
@@ -0,0 +1,41 @@
 # Builds the Windows artifacts (node binaries).
 # Usage:  .\ci\build_windows_artifacts.ps1 [target]
 # Targets supported:
 # - x86_64-pc-windows-msvc
 # - i686-pc-windows-msvc
 function Prebuild-Rust {
    param (
        [string]$target
    )
    # Building here for the sake of easier debugging.
    Push-Location -Path "rust/ffi/node"
    Write-Host "Building rust library for $target"
    $env:RUST_BACKTRACE=1
    cargo build --release --target $target
    Pop-Location
 }
 function Build-NodeBinaries {
    param (
        [string]$target
    )
    Push-Location -Path "node"
    Write-Host "Building node library for $target"
    npm run build-release -- --target $target
    npm run pack-build -- --target $target
    Pop-Location
 }
 $targets = $args[0]
 if (-not $targets) {
    $targets = "x86_64-pc-windows-msvc"
 }
 Write-Host "Building artifacts for targets: $targets"
 foreach ($target in $targets) {
    Prebuild-Rust $target
    Build-NodeBinaries $target
 }
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -60,6 +60,9 @@ nav:
 - Python integrations:
  - Pandas and PyArrow: python/arrow.md
  - DuckDB: python/duckdb.md
  - LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
  - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
  - Pydantic: python/pydantic.md
 - Python examples:
  - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
  - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
@@ -68,6 +71,7 @@ nav:
  - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
 - Javascript examples:
  - YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
  - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
 - References:
  - Vector Search: search.md
  - SQL filters: sql.md
--- a/docs/src/embedding.md
+++ b/docs/src/embedding.md
@@ -46,7 +46,7 @@ You can also use an external API like OpenAI to generate embeddings
        def embed_func(c):
            rs = openai.Embedding.create(input=c, engine="text-embedding-ada-002")
-        return [record["embedding"] for record in rs["data"]]
+            return [record["embedding"] for record in rs["data"]]
      ```
 === "Javascript"
@@ -126,7 +126,7 @@ belong in the same latent space and your results will be nonsensical.
 === "Javascript"
     ```javascript
      const results = await table
-        .search('What's the best pizza topping?')
+        .search("What's the best pizza topping?")
        .limit(10)
        .execute()
     ```
--- a/docs/src/examples/transformerjs_embedding_search_nodejs.md
+++ b/docs/src/examples/transformerjs_embedding_search_nodejs.md
@@ -0,0 +1,121 @@
 # Vector embedding search using TransformersJS
 ## Embed and query data from LacneDB using TransformersJS
 <img id="splash" width="400" alt="transformersjs" src="https://github.com/lancedb/lancedb/assets/43097991/88a31e30-3d6f-4eef-9216-4b7c688f1b4f">
 This example shows how to use the [transformers.js](https://github.com/xenova/transformers.js) library to perform vector embedding search using LanceDB's Javascript API.
 ### Setting up
 First, install the dependencies:
 ```bash
 npm install vectordb
 npm i @xenova/transformers
 ```
 We will also be using the [all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) model to make it compatible with Transformers.js
 Within our `index.js` file we will import the necessary libraries and define our model and database:
 ```javascript
 const lancedb = require('vectordb')
 const { pipeline } = await import('@xenova/transformers')
 const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
 ```
 ### Creating the embedding function
 Next, we will create a function that will take in a string and return the vector embedding of that string. We will use the `pipe` function we defined earlier to get the vector embedding of the string.
 ```javascript
 // Define the function. `sourceColumn` is required for LanceDB to know
 // which column to use as input.
 const embed_fun = {}
 embed_fun.sourceColumn = 'text'
 embed_fun.embed = async function (batch) {
    let result = []
    // Given a batch of strings, we will use the `pipe` function to get
    // the vector embedding of each string.
    for (let text of batch) {
        // 'mean' pooling and normalizing allows the embeddings to share the
        // same length.
        const res = await pipe(text, { pooling: 'mean', normalize: true })
        result.push(Array.from(res['data']))
    }
    return (result)
 }
 ```
 ### Creating the database
 Now, we will create the LanceDB database and add the embedding function we defined earlier.
 ```javascript
 // Link a folder and create a table with data
 const db = await lancedb.connect('data/sample-lancedb')
 // You can also import any other data, but make sure that you have a column
 // for the embedding function to use.
 const data = [
    { id: 1, text: 'Cherry', type: 'fruit' },
    { id: 2, text: 'Carrot', type: 'vegetable' },
    { id: 3, text: 'Potato', type: 'vegetable' },
    { id: 4, text: 'Apple', type: 'fruit' },
    { id: 5, text: 'Banana', type: 'fruit' }
 ]
 // Create the table with the embedding function
 const table = await db.createTable('food_table', data, "create", embed_fun)
 ```
 ### Performing the search
 Now, we can perform the search using the `search` function. LanceDB automatically uses the embedding function we defined earlier to get the vector embedding of the query string.
 ```javascript
 // Query the table
 const results = await table
    .search("a sweet fruit to eat")
    .metricType("cosine")
    .limit(2)
    .execute()
 console.log(results.map(r => r.text))
 ```
 ```bash
 [ 'Banana', 'Cherry' ]
 ```
 Output of `results`:
 ```bash
 [
  {
    vector: Float32Array(384) [
      -0.057455405592918396,
      0.03617725893855095,
      -0.0367760956287384,
      ... 381 more items
    ],
    id: 5,
    text: 'Banana',
    type: 'fruit',
    score: 0.4919965863227844
  },
  {
    vector: Float32Array(384) [
      0.0009714411571621895,
      0.008223623037338257,
      0.009571489877998829,
      ... 381 more items
    ],
    id: 1,
    text: 'Cherry',
    type: 'fruit',
    score: 0.5540297031402588
  }
 ]
 ```
 ### Wrapping it up
 In this example, we showed how to use the `transformers.js` library to perform vector embedding search using LanceDB's Javascript API. You can find the full code for this example on [Github](https://github.com/lancedb/lancedb/blob/main/node/examples/js-transformers/index.js)!
--- a/docs/src/python/arrow.md
+++ b/docs/src/python/arrow.md
@@ -5,6 +5,8 @@ Built on top of [Apache Arrow](https://arrow.apache.org/),
 `LanceDB` is easy to integrate with the Python ecosystem, including [Pandas](https://pandas.pydata.org/)
 and PyArrow.
 ## Create dataset
 First, we need to connect to a `LanceDB` database.
 ```py
@@ -27,10 +29,42 @@ data = pd.DataFrame({
 table = db.create_table("pd_table", data=data)
 ```
-You will find detailed instructions of creating dataset and index in
+Similar to [`pyarrow.write_dataset()`](https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html),
-[Basic Operations](basic.md) and [Indexing](ann_indexes.md)
+[db.create_table()](../python/#lancedb.db.DBConnection.create_table) accepts a wide-range of forms of data.
 For example, if you have a dataset that is larger than memory size, you can create table with `Iterator[pyarrow.RecordBatch]`,
 to lazily generate data:
 ```py
 from typing import Iterable
 import pyarrow as pa
 import lancedb
 def make_batches() -> Iterable[pa.RecordBatch]:
    for i in range(5):
        yield pa.RecordBatch.from_arrays(
            [
                pa.array([[3.1, 4.1], [5.9, 26.5]]),
                pa.array(["foo", "bar"]),
                pa.array([10.0, 20.0]),
            ],
            ["vector", "item", "price"])
 schema=pa.schema([
    pa.field("vector", pa.list_(pa.float32())),
    pa.field("item", pa.utf8()),
    pa.field("price", pa.float32()),
 ])
 table = db.create_table("iterable_table", data=make_batches(), schema=schema)
 ```
 You will find detailed instructions of creating dataset in
 [Basic Operations](../basic.md) and [API](../python/#lancedb.db.DBConnection.create_table)
 sections.
 ## Vector Search
 We can now perform similarity search via `LanceDB` Python API.
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -0,0 +1,35 @@
 # Pydantic
 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
 ## Schema
 LanceDB supports to create Apache Arrow Schema from a
 [Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
 via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
 ::: lancedb.pydantic.pydantic_to_schema
 ## Vector Field
 LanceDB provides a [`vector(dim)`](python.md#lancedb.pydantic.vector) method to define a
 vector Field in a Pydantic Model.
 ::: lancedb.pydantic.vector
 ## Type Conversion
 LanceDB automatically convert Pydantic fields to
 [Apache Arrow DataType](https://arrow.apache.org/docs/python/generated/pyarrow.DataType.html#pyarrow.DataType).
 Current supported type conversions:
 | Pydantic Field Type | PyArrow Data Type |
 | ------------------- | ----------------- |
 | `int`               | `pyarrow.int64`   |
 | `float`              | `pyarrow.float64`  |
 | `bool`              | `pyarrow.bool`    |
 | `str`               | `pyarrow.utf8()`    |
 | `list`              | `pyarrow.List`    |
 | `BaseModel`         | `pyarrow.Struct`    |
 | `vector(n)`         | `pyarrow.FixedSizeList(float32, n)` |
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -46,10 +46,6 @@ pip install lancedb
 ## Utilities
 ::: lancedb.schema.schema_to_dict
 ::: lancedb.schema.dict_to_schema
 ::: lancedb.vector
 ## Integrations
--- a/docs/test/md_testing.js
+++ b/docs/test/md_testing.js
@@ -7,6 +7,7 @@ const excludedFiles = [
  "../src/embedding.md",
  "../src/examples/serverless_lancedb_with_s3_and_lambda.md",
  "../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
  "../src/examples/transformerjs_embedding_search_nodejs.md",
  "../src/examples/youtube_transcript_bot_with_nodejs.md",
 ];
 const nodePrefix = "javascript";
--- a/node/examples/js-transformers/index.js
+++ b/node/examples/js-transformers/index.js
@@ -0,0 +1,66 @@
 // Copyright 2023 Lance Developers.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 'use strict'
 async function example() {
    const lancedb = require('vectordb')
    // Import transformers and the all-MiniLM-L6-v2 model (https://huggingface.co/Xenova/all-MiniLM-L6-v2)
    const { pipeline } = await import('@xenova/transformers')
    const pipe = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
    // Create embedding function from pipeline which returns a list of vectors from batch
    // sourceColumn is the name of the column in the data to be embedded
    //
    // Output of pipe is a Tensor { data: Float32Array(384) }, so filter for the vector
    const embed_fun = {}
    embed_fun.sourceColumn = 'text'
    embed_fun.embed = async function (batch) {
        let result = []
        for (let text of batch) {
            const res = await pipe(text, { pooling: 'mean', normalize: true })
            result.push(Array.from(res['data']))
        }
        return (result)
    }
    // Link a folder and create a table with data
    const db = await lancedb.connect('data/sample-lancedb')
    const data = [
        { id: 1, text: 'Cherry', type: 'fruit' },
        { id: 2, text: 'Carrot', type: 'vegetable' },
        { id: 3, text: 'Potato', type: 'vegetable' },
        { id: 4, text: 'Apple', type: 'fruit' },
        { id: 5, text: 'Banana', type: 'fruit' }
    ]
    const table = await db.createTable('food_table', data, "create", embed_fun)
    // Query the table
    const results = await table
        .search("a sweet fruit to eat")
        .metricType("cosine")
        .limit(2)
        .execute()
    console.log(results.map(r => r.text))
 }
 example().then(_ => { console.log("Done!") })
--- a/node/examples/js-transformers/package.json
+++ b/node/examples/js-transformers/package.json
@@ -0,0 +1,16 @@
 {
  "name": "vectordb-example-js-transformers",
  "version": "1.0.0",
  "description": "Example for using transformers.js with lancedb",
  "main": "index.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "Lance Devs",
  "license": "Apache-2.0",
  "dependencies": {
    "@xenova/transformers": "^2.4.1",
    "vectordb": "^0.1.12"
  }
 }
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.1.12",
+  "version": "0.1.13",
  "lockfileVersion": 2,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.1.12",
+      "version": "0.1.13",
      "cpu": [
        "x64",
        "arm64"
@@ -14,7 +14,8 @@
      "license": "Apache-2.0",
      "os": [
        "darwin",
-        "linux"
+        "linux",
        "win32"
      ],
      "dependencies": {
        "@apache-arrow/ts": "^12.0.0",
@@ -49,10 +50,11 @@
        "typescript": "*"
      },
      "optionalDependencies": {
-        "vectordb-darwin-arm64": "0.1.12",
+        "vectordb-darwin-arm64": "0.1.13",
-        "vectordb-darwin-x64": "0.1.12",
+        "vectordb-darwin-x64": "0.1.13",
-        "vectordb-linux-arm64-gnu": "0.1.12",
+        "vectordb-linux-arm64-gnu": "0.1.13",
-        "vectordb-linux-x64-gnu": "0.1.12"
+        "vectordb-linux-x64-gnu": "0.1.13",
        "vectordb-win32-x64-msvc": "0.1.13"
      }
    },
    "node_modules/@apache-arrow/ts": {
@@ -4286,6 +4288,42 @@
      "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
      "dev": true
    },
    "node_modules/vectordb-darwin-arm64": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.13.tgz",
      "integrity": "sha512-9lLuX5P8m75EfP85pfC4LxO9J7Tzu4LngX55BVAdFe6qPRHu+iHmLw0QYYSVDqNm3GtDr2qFJlL2ILlsApyYyg==",
      "cpu": [
        "arm64"
      ],
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/vectordb-darwin-x64": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.13.tgz",
      "integrity": "sha512-5mkhBJlcfAqcty7Ww2csgYogq+b0NhtllAbag9IIznvqfcrvITU0H0vm5LGWbRuE/BUUxC25MJhm93YWBzqEVA==",
      "cpu": [
        "x64"
      ],
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/vectordb-linux-x64-gnu": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.13.tgz",
      "integrity": "sha512-fU+sIHUkXyMdrWjggT93p0blKD+pbgr+x01tn9d2/pbA1ePo2AwuE86rYPA+BjyCUE1QifPgKadzGVVpqWYmnQ==",
      "cpu": [
        "x64"
      ],
      "optional": true,
      "os": [
        "linux"
      ]
    },
    "node_modules/vscode-oniguruma": {
      "version": "1.7.0",
      "resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz",
@@ -7581,6 +7619,24 @@
      "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==",
      "dev": true
    },
    "vectordb-darwin-arm64": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.1.13.tgz",
      "integrity": "sha512-9lLuX5P8m75EfP85pfC4LxO9J7Tzu4LngX55BVAdFe6qPRHu+iHmLw0QYYSVDqNm3GtDr2qFJlL2ILlsApyYyg==",
      "optional": true
    },
    "vectordb-darwin-x64": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-darwin-x64/-/vectordb-darwin-x64-0.1.13.tgz",
      "integrity": "sha512-5mkhBJlcfAqcty7Ww2csgYogq+b0NhtllAbag9IIznvqfcrvITU0H0vm5LGWbRuE/BUUxC25MJhm93YWBzqEVA==",
      "optional": true
    },
    "vectordb-linux-x64-gnu": {
      "version": "0.1.13",
      "resolved": "https://registry.npmjs.org/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.1.13.tgz",
      "integrity": "sha512-fU+sIHUkXyMdrWjggT93p0blKD+pbgr+x01tn9d2/pbA1ePo2AwuE86rYPA+BjyCUE1QifPgKadzGVVpqWYmnQ==",
      "optional": true
    },
    "vscode-oniguruma": {
      "version": "1.7.0",
      "resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz",
--- a/node/package.json
+++ b/node/package.json
@@ -8,7 +8,7 @@
    "tsc": "tsc -b",
    "build": "cargo-cp-artifact --artifact cdylib vectordb-node index.node -- cargo build --message-format=json",
    "build-release": "npm run build -- --release",
-    "test": "npm run tsc; mocha -recursive dist/test",
+    "test": "npm run tsc && mocha -recursive dist/test",
    "lint": "eslint src --ext .js,.ts",
    "clean": "rm -rf node_modules *.node dist/",
    "pack-build": "neon pack-build",
@@ -60,7 +60,8 @@
  },
  "os": [
    "darwin",
-    "linux"
+    "linux",
    "win32"
  ],
  "cpu": [
    "x64",
@@ -71,13 +72,15 @@
      "x86_64-apple-darwin": "vectordb-darwin-x64",
      "aarch64-apple-darwin": "vectordb-darwin-arm64",
      "x86_64-unknown-linux-gnu": "vectordb-linux-x64-gnu",
-      "aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu"
+      "aarch64-unknown-linux-gnu": "vectordb-linux-arm64-gnu",
      "x86_64-pc-windows-msvc": "vectordb-win32-x64-msvc"
    }
  },
  "optionalDependencies": {
    "vectordb-darwin-arm64": "0.1.13",
    "vectordb-darwin-x64": "0.1.13",
    "vectordb-linux-arm64-gnu": "0.1.13",
    "vectordb-linux-x64-gnu": "0.1.13",
-    "vectordb-linux-arm64-gnu": "0.1.13"
+    "vectordb-win32-x64-msvc": "0.1.13"
  }
 }
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -13,11 +13,12 @@
 from __future__ import annotations
 import functools
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 import pandas as pd
 import pyarrow as pa
 from pyarrow import fs
@@ -38,8 +39,10 @@ class DBConnection(ABC):
    def create_table(
        self,
        name: str,
-        data: DATA = None,
+        data: Optional[
-        schema: pa.Schema = None,
+            Union[List[dict], dict, pd.DataFrame, pa.Table, Iterable[pa.RecordBatch]],
        ] = None,
        schema: Optional[pa.Schema] = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
@@ -51,7 +54,7 @@ class DBConnection(ABC):
        name: str
            The name of the table.
        data: list, tuple, dict, pd.DataFrame; optional
-            The data to insert into the table.
+            The data to initialize the table. User must provide at least one of `data` or `schema`.
        schema: pyarrow.Schema; optional
            The schema of the table.
        mode: str; default "create"
@@ -64,16 +67,16 @@ class DBConnection(ABC):
        fill_value: float
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
        Note
        ----
        The vector index won't be created by default.
        To create the index, call the `create_index` method on the table.
        Returns
        -------
        LanceTable
            A reference to the newly created table.
        !!! note
            The vector index won't be created by default.
            To create the index, call the `create_index` method on the table.
        Examples
        --------
@@ -119,7 +122,7 @@ class DBConnection(ABC):
        Data is converted to Arrow before being written to disk. For maximum
        control over how data is saved, either provide the PyArrow schema to
-        convert to or else provide a PyArrow table directly.
+        convert to or else provide a [PyArrow Table](pyarrow.Table) directly.
        >>> custom_schema = pa.schema([
        ...   pa.field("vector", pa.list_(pa.float32(), 2)),
@@ -138,6 +141,30 @@ class DBConnection(ABC):
        vector: [[[1.1,1.2],[0.2,1.8]]]
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]
        It is also possible to create an table from `[Iterable[pa.RecordBatch]]`:
        >>> import pyarrow as pa
        >>> def make_batches():
        ...     for i in range(5):
        ...         yield pa.RecordBatch.from_arrays(
        ...             [
        ...                 pa.array([[3.1, 4.1], [5.9, 26.5]]),
        ...                 pa.array(["foo", "bar"]),
        ...                 pa.array([10.0, 20.0]),
        ...             ],
        ...             ["vector", "item", "price"],
        ...         )
        >>> schema=pa.schema([
        ...     pa.field("vector", pa.list_(pa.float32())),
        ...     pa.field("item", pa.utf8()),
        ...     pa.field("price", pa.float32()),
        ... ])
        >>> db.create_table("table4", make_batches(), schema=schema)
        LanceTable(table4)
        """
        raise NotImplementedError
@@ -252,7 +279,7 @@ class LanceDBConnection(DBConnection):
    def create_table(
        self,
        name: str,
-        data: DATA = None,
+        data: Optional[Union[List[dict], dict, pd.DataFrame]] = None,
        schema: pa.Schema = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
@@ -260,114 +287,22 @@ class LanceDBConnection(DBConnection):
    ) -> LanceTable:
        """Create a table in the database.
-        Parameters
+        See
-        ----------
+        ---
-        name: str
+        DBConnection.create_table
            The name of the table.
        data: list, tuple, dict, pd.DataFrame; optional
            The data to insert into the table.
        schema: pyarrow.Schema; optional
            The schema of the table.
        mode: str; default "create"
            The mode to use when creating the table. Can be either "create" or "overwrite".
            By default, if the table already exists, an exception is raised.
            If you want to overwrite the table, use mode="overwrite".
        on_bad_vectors: str, default "error"
            What to do if any of the vectors are not the same size or contains NaNs.
            One of "error", "drop", "fill".
        fill_value: float
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
        Note
        ----
        The vector index won't be created by default.
        To create the index, call the `create_index` method on the table.
        Returns
        -------
        LanceTable
            A reference to the newly created table.
        Examples
        --------
        Can create with list of tuples or dictionaries:
        >>> import lancedb
        >>> db = lancedb.connect("./.lancedb")
        >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
        ...         {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
        >>> db.create_table("my_table", data)
        LanceTable(my_table)
        >>> db["my_table"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
        lat: double
        long: double
        ----
        vector: [[[1.1,1.2],[0.2,1.8]]]
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]
        You can also pass a pandas DataFrame:
        >>> import pandas as pd
        >>> data = pd.DataFrame({
        ...    "vector": [[1.1, 1.2], [0.2, 1.8]],
        ...    "lat": [45.5, 40.1],
        ...    "long": [-122.7, -74.1]
        ... })
        >>> db.create_table("table2", data)
        LanceTable(table2)
        >>> db["table2"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
        lat: double
        long: double
        ----
        vector: [[[1.1,1.2],[0.2,1.8]]]
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]
        Data is converted to Arrow before being written to disk. For maximum
        control over how data is saved, either provide the PyArrow schema to
        convert to or else provide a PyArrow table directly.
        >>> custom_schema = pa.schema([
        ...   pa.field("vector", pa.list_(pa.float32(), 2)),
        ...   pa.field("lat", pa.float32()),
        ...   pa.field("long", pa.float32())
        ... ])
        >>> db.create_table("table3", data, schema = custom_schema)
        LanceTable(table3)
        >>> db["table3"].head()
        pyarrow.Table
        vector: fixed_size_list<item: float>[2]
          child 0, item: float
        lat: float
        long: float
        ----
        vector: [[[1.1,1.2],[0.2,1.8]]]
        lat: [[45.5,40.1]]
        long: [[-122.7,-74.1]]
        """
        if mode.lower() not in ["create", "overwrite"]:
            raise ValueError("mode must be either 'create' or 'overwrite'")
-        if data is not None:
+        tbl = LanceTable.create(
-            tbl = LanceTable.create(
+            self,
-                self,
+            name,
-                name,
+            data,
-                data,
+            schema,
-                schema,
+            mode=mode,
-                mode=mode,
+            on_bad_vectors=on_bad_vectors,
-                on_bad_vectors=on_bad_vectors,
+            fill_value=fill_value,
-                fill_value=fill_value,
+        )
            )
        else:
            tbl = LanceTable.open(self, name)
        return tbl
    def open_table(self, name: str) -> LanceTable:
--- a/python/lancedb/pydantic.py
+++ b/python/lancedb/pydantic.py
@@ -18,7 +18,7 @@ from __future__ import annotations
 import inspect
 import sys
 import types
-from abc import ABC, abstractstaticmethod
+from abc import ABC, abstractmethod
 from typing import Any, List, Type, Union, _GenericAlias
 import pyarrow as pa
@@ -27,11 +27,13 @@ from pydantic_core import CoreSchema, core_schema
 class FixedSizeListMixin(ABC):
-    @abstractstaticmethod
+    @staticmethod
    @abstractmethod
    def dim() -> int:
        raise NotImplementedError
-    @abstractstaticmethod
+    @staticmethod
    @abstractmethod
    def value_arrow_type() -> pa.DataType:
        raise NotImplementedError
@@ -41,9 +43,15 @@ def vector(
 ) -> Type[FixedSizeListMixin]:
    """Pydantic Vector Type.
-    Note
+    !!! warning
-    ----
+        Experimental feature.
-    Experimental feature.
+
    Parameters
    ----------
    dim : int
        The dimension of the vector.
    value_type : pyarrow.DataType, optional
        The value type of the vector, by default pa.float32()
    Examples
    --------
@@ -52,9 +60,15 @@ def vector(
    >>> from lancedb.pydantic import vector
    ...
    >>> class MyModel(pydantic.BaseModel):
    ...     vector: vector(756)
    ...     id: int
-    ...     description: str
+    ...     url: str
    ...     embeddings: vector(768)
    >>> schema = pydantic_to_schema(MyModel)
    >>> assert schema == pa.schema([
    ...     pa.field("id", pa.int64(), False),
    ...     pa.field("url", pa.utf8(), False),
    ...     pa.field("embeddings", pa.list_(pa.float32(), 768), False)
    ... ])
    """
    # TODO: make a public parameterized type.
@@ -163,7 +177,36 @@ def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
    Returns
    -------
-    A PyArrow Schema.
+    pyarrow.Schema
    Examples
    --------
    >>> from typing import List, Optional
    >>> import pydantic
    >>> from lancedb.pydantic import pydantic_to_schema
    ...
    >>> class InnerModel(pydantic.BaseModel):
    ...     a: str
    ...     b: Optional[float]
    >>>
    >>> class FooModel(pydantic.BaseModel):
    ...     id: int
    ...     s: Optional[str] = None
    ...     vec: List[float]
    ...     li: List[int]
    ...     inner: InnerModel
    >>> schema = pydantic_to_schema(FooModel)
    >>> assert schema == pa.schema([
    ...     pa.field("id", pa.int64(), False),
    ...     pa.field("s", pa.utf8(), True),
    ...     pa.field("vec", pa.list_(pa.float64()), False),
    ...     pa.field("li", pa.list_(pa.int64()), False),
    ...     pa.field("inner", pa.struct([
    ...         pa.field("a", pa.utf8(), False),
    ...         pa.field("b", pa.float64(), True),
    ...     ]), False),
    ... ])
    """
    fields = _pydantic_model_to_fields(model)
    return pa.schema(fields)
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -226,6 +226,7 @@ class LanceQueryBuilder:
            columns=self._columns,
            nprobes=self._nprobes,
            refine_factor=self._refine_factor,
            vector_column=self._vector_column,
        )
        return self._table._execute_query(query)
--- a/python/lancedb/remote/arrow.py
+++ b/python/lancedb/remote/arrow.py
@@ -0,0 +1,22 @@
 #  Copyright 2023 LanceDB Developers
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import pyarrow as pa
 def to_ipc_binary(table: pa.Table) -> bytes:
    """Serialize a PyArrow Table to IPC binary."""
    sink = pa.BufferOutputStream()
    with pa.ipc.new_stream(sink, table.schema) as writer:
        writer.write_table(table)
    return sink.getvalue().to_pybytes()
--- a/python/lancedb/remote/client.py
+++ b/python/lancedb/remote/client.py
@@ -13,7 +13,7 @@
 import functools
-from typing import Any, Callable, Dict, Union
+from typing import Any, Callable, Dict, Optional, Union
 import aiohttp
 import attr
@@ -24,6 +24,8 @@ from lancedb.common import Credential
 from lancedb.remote import VectorQuery, VectorQueryResult
 from lancedb.remote.errors import LanceDBClientError
 ARROW_STREAM_CONTENT_TYPE = "application/vnd.apache.arrow.stream"
 def _check_not_closed(f):
    @functools.wraps(f)
@@ -59,9 +61,12 @@ class RestfulLanceDBClient:
    @functools.cached_property
    def headers(self) -> Dict[str, str]:
-        return {
+        headers = {
            "x-api-key": self.api_key,
        }
        if self.region == "local":  # Local test mode
            headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
        return headers
    @staticmethod
    async def _check_status(resp: aiohttp.ClientResponse):
@@ -93,7 +98,9 @@ class RestfulLanceDBClient:
    async def post(
        self,
        uri: str,
-        data: Union[Dict[str, Any], BaseModel],
+        data: Union[Dict[str, Any], BaseModel, bytes],
        params: Optional[Dict[str, Any]] = None,
        content_type: Optional[str] = None,
        deserialize: Callable = lambda resp: resp.json(),
    ) -> Dict[str, Any]:
        """Send a POST request and returns the deserialized response payload.
@@ -107,10 +114,19 @@ class RestfulLanceDBClient:
        """
        if isinstance(data, BaseModel):
            data: Dict[str, Any] = data.dict(exclude_none=True)
        if isinstance(data, bytes):
            req_kwargs = {"data": data}
        else:
            req_kwargs = {"json": data}
        headers = self.headers.copy()
        if content_type is not None:
            headers["content-type"] = content_type
        async with self.session.post(
            uri,
-            json=data,
+            headers=headers,
-            headers=self.headers,
+            params=params,
            **req_kwargs,
        ) as resp:
            resp: aiohttp.ClientResponse = resp
            await self._check_status(resp)
@@ -119,11 +135,11 @@ class RestfulLanceDBClient:
    @_check_not_closed
    async def list_tables(self):
        """List all tables in the database."""
-        json = await self.get("/1/table/", {})
+        json = await self.get("/v1/table/", {})
        return json["tables"]
    @_check_not_closed
    async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult:
        """Query a table."""
-        tbl = await self.post(f"/1/table/{table_name}/", query, deserialize=_read_ipc)
+        tbl = await self.post(f"/v1/table/{table_name}/", query, deserialize=_read_ipc)
        return VectorQueryResult(tbl)
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -12,6 +12,7 @@
 #  limitations under the License.
 import asyncio
 import uuid
 from typing import List
 from urllib.parse import urlparse
@@ -19,9 +20,11 @@ import pyarrow as pa
 from lancedb.common import DATA
 from lancedb.db import DBConnection
-from lancedb.table import Table
+from lancedb.schema import schema_to_json
 from lancedb.table import Table, _sanitize_data
-from .client import RestfulLanceDBClient
+from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
 class RemoteDBConnection(DBConnection):
@@ -71,8 +74,31 @@ class RemoteDBConnection(DBConnection):
        name: str,
        data: DATA = None,
        schema: pa.Schema = None,
        mode: str = "create",
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
    ) -> Table:
-        raise NotImplementedError
+        if data is None and schema is None:
            raise ValueError("Either data or schema must be provided.")
        if data is not None:
            data = _sanitize_data(
                data, schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
            )
        else:
            if schema is None:
                raise ValueError("Either data or schema must be provided")
            data = pa.Table.from_pylist([], schema=schema)
        from .table import RemoteTable
        data = to_ipc_binary(data)
        request_id = uuid.uuid4().hex
        self._loop.run_until_complete(
            self._client.post(
                f"/v1/table/{name}/create",
                data=data,
                params={"request_id": request_id},
                content_type=ARROW_STREAM_CONTENT_TYPE,
            )
        )
        return RemoteTable(self, name)
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -11,6 +11,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import uuid
 from functools import cached_property
 from typing import Union
 import pyarrow as pa
@@ -18,7 +20,10 @@ import pyarrow as pa
 from lancedb.common import DATA, VEC, VECTOR_COLUMN_NAME
 from ..query import LanceQueryBuilder, Query
-from ..table import Query, Table
+from ..schema import json_to_schema
 from ..table import Query, Table, _sanitize_data
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE
 from .db import RemoteDBConnection
@@ -30,8 +35,14 @@ class RemoteTable(Table):
    def __repr__(self) -> str:
        return f"RemoteTable({self._conn.db_name}.{self.name})"
    @cached_property
    def schema(self) -> pa.Schema:
-        raise NotImplementedError
+        """Return the schema of the table."""
        resp = self._conn._loop.run_until_complete(
            self._conn._client.get(f"/v1/table/{self._name}/describe")
        )
        schema = json_to_schema(resp["schema"])
        return schema
    def to_arrow(self) -> pa.Table:
        raise NotImplementedError
@@ -53,7 +64,22 @@ class RemoteTable(Table):
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
    ) -> int:
-        raise NotImplementedError
+        data = _sanitize_data(
            data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
        )
        payload = to_ipc_binary(data)
        request_id = uuid.uuid4().hex
        self._conn._loop.run_until_complete(
            self._conn._client.post(
                f"/v1/table/{self._name}/insert",
                data=payload,
                params={"request_id": request_id, "mode": mode},
                content_type=ARROW_STREAM_CONTENT_TYPE,
            )
        )
        return len(data)
    def search(
        self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME
--- a/python/lancedb/schema.py
+++ b/python/lancedb/schema.py
@@ -13,10 +13,10 @@
 """Schema related utilities."""
 import json
 from typing import Any, Dict, Type
 import pyarrow as pa
 from lance import json_to_schema, schema_to_json
 def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataType:
@@ -43,247 +43,3 @@ def vector(dimension: int, value_type: pa.DataType = pa.float32()) -> pa.DataTyp
    ... ])
    """
    return pa.list_(value_type, dimension)
 def _type_to_dict(dt: pa.DataType) -> Dict[str, Any]:
    if pa.types.is_boolean(dt):
        return {"type": "boolean"}
    elif pa.types.is_int8(dt):
        return {"type": "int8"}
    elif pa.types.is_int16(dt):
        return {"type": "int16"}
    elif pa.types.is_int32(dt):
        return {"type": "int32"}
    elif pa.types.is_int64(dt):
        return {"type": "int64"}
    elif pa.types.is_uint8(dt):
        return {"type": "uint8"}
    elif pa.types.is_uint16(dt):
        return {"type": "uint16"}
    elif pa.types.is_uint32(dt):
        return {"type": "uint32"}
    elif pa.types.is_uint64(dt):
        return {"type": "uint64"}
    elif pa.types.is_float16(dt):
        return {"type": "float16"}
    elif pa.types.is_float32(dt):
        return {"type": "float32"}
    elif pa.types.is_float64(dt):
        return {"type": "float64"}
    elif pa.types.is_date32(dt):
        return {"type": f"date32"}
    elif pa.types.is_date64(dt):
        return {"type": f"date64"}
    elif pa.types.is_time32(dt):
        return {"type": f"time32:{dt.unit}"}
    elif pa.types.is_time64(dt):
        return {"type": f"time64:{dt.unit}"}
    elif pa.types.is_timestamp(dt):
        return {"type": f"timestamp:{dt.unit}:{dt.tz if dt.tz is not None else ''}"}
    elif pa.types.is_string(dt):
        return {"type": "string"}
    elif pa.types.is_binary(dt):
        return {"type": "binary"}
    elif pa.types.is_large_string(dt):
        return {"type": "large_string"}
    elif pa.types.is_large_binary(dt):
        return {"type": "large_binary"}
    elif pa.types.is_fixed_size_binary(dt):
        return {"type": "fixed_size_binary", "width": dt.byte_width}
    elif pa.types.is_fixed_size_list(dt):
        return {
            "type": "fixed_size_list",
            "width": dt.list_size,
            "value_type": _type_to_dict(dt.value_type),
        }
    elif pa.types.is_list(dt):
        return {
            "type": "list",
            "value_type": _type_to_dict(dt.value_type),
        }
    elif pa.types.is_struct(dt):
        return {
            "type": "struct",
            "fields": [_field_to_dict(dt.field(i)) for i in range(dt.num_fields)],
        }
    elif pa.types.is_dictionary(dt):
        return {
            "type": "dictionary",
            "index_type": _type_to_dict(dt.index_type),
            "value_type": _type_to_dict(dt.value_type),
        }
    # TODO: support extension types
    raise TypeError(f"Unsupported type: {dt}")
 def _field_to_dict(field: pa.field) -> Dict[str, Any]:
    ret = {
        "name": field.name,
        "type": _type_to_dict(field.type),
        "nullable": field.nullable,
    }
    if field.metadata is not None:
        ret["metadata"] = field.metadata
    return ret
 def schema_to_dict(schema: pa.Schema) -> Dict[str, Any]:
    """Convert a PyArrow [Schema](pyarrow.Schema) to a dictionary.
    Parameters
    ----------
    schema : pa.Schema
        The PyArrow Schema to convert
    Returns
    -------
    A dict of the data type.
    Examples
    --------
    >>> import pyarrow as pa
    >>> import lancedb
    >>> schema = pa.schema(
    ...     [
    ...         pa.field("id", pa.int64()),
    ...         pa.field("vector", lancedb.vector(512), nullable=False),
    ...         pa.field(
    ...             "struct",
    ...             pa.struct(
    ...             [
    ...                 pa.field("a", pa.utf8()),
    ...                 pa.field("b", pa.float32()),
    ...             ]
    ...         ),
    ...         True,
    ...     ),
    ...     ],
    ...     metadata={"key": "value"},
    ... )
    >>> json_schema = schema_to_dict(schema)
    >>> assert json_schema == {
    ...     "fields": [
    ...     {"name": "id", "type": {"type": "int64"}, "nullable": True},
    ...     {
    ...         "name": "vector",
    ...         "type": {
    ...             "type": "fixed_size_list",
    ...             "value_type": {"type": "float32"},
    ...             "width": 512,
    ...         },
    ...        "nullable": False,
    ...    },
    ...    {
    ...         "name": "struct",
    ...         "type": {
    ...             "type": "struct",
    ...             "fields": [
    ...                 {"name": "a", "type": {"type": "string"}, "nullable": True},
    ...                 {"name": "b", "type": {"type": "float32"}, "nullable": True},
    ...            ],
    ...         },
    ...         "nullable": True,
    ...     },
    ...     ],
    ...     "metadata": {"key": "value"},
    ... }
    """
    fields = []
    for name in schema.names:
        field = schema.field(name)
        fields.append(_field_to_dict(field))
    json_schema = {
        "fields": fields,
        "metadata": {
            k.decode("utf-8"): v.decode("utf-8") for (k, v) in schema.metadata.items()
        }
        if schema.metadata is not None
        else {},
    }
    return json_schema
 def _dict_to_type(dt: Dict[str, Any]) -> pa.DataType:
    type_name = dt["type"]
    try:
        return {
            "boolean": pa.bool_(),
            "int8": pa.int8(),
            "int16": pa.int16(),
            "int32": pa.int32(),
            "int64": pa.int64(),
            "uint8": pa.uint8(),
            "uint16": pa.uint16(),
            "uint32": pa.uint32(),
            "uint64": pa.uint64(),
            "float16": pa.float16(),
            "float32": pa.float32(),
            "float64": pa.float64(),
            "string": pa.string(),
            "binary": pa.binary(),
            "large_string": pa.large_string(),
            "large_binary": pa.large_binary(),
            "date32": pa.date32(),
            "date64": pa.date64(),
        }[type_name]
    except KeyError:
        pass
    if type_name == "fixed_size_binary":
        return pa.binary(dt["width"])
    elif type_name == "fixed_size_list":
        return pa.list_(_dict_to_type(dt["value_type"]), dt["width"])
    elif type_name == "list":
        return pa.list_(_dict_to_type(dt["value_type"]))
    elif type_name == "struct":
        fields = []
        for field in dt["fields"]:
            fields.append(_dict_to_field(field))
        return pa.struct(fields)
    elif type_name == "dictionary":
        return pa.dictionary(
            _dict_to_type(dt["index_type"]), _dict_to_type(dt["value_type"])
        )
    elif type_name.startswith("time32:"):
        return pa.time32(type_name.split(":")[1])
    elif type_name.startswith("time64:"):
        return pa.time64(type_name.split(":")[1])
    elif type_name.startswith("timestamp:"):
        fields = type_name.split(":")
        unit = fields[1]
        tz = fields[2] if len(fields) > 2 else None
        return pa.timestamp(unit, tz)
    raise TypeError(f"Unsupported type: {dt}")
 def _dict_to_field(field: Dict[str, Any]) -> pa.Field:
    name = field["name"]
    nullable = field["nullable"] if "nullable" in field else True
    dt = _dict_to_type(field["type"])
    metadata = field.get("metadata", None)
    return pa.field(name, dt, nullable, metadata)
 def dict_to_schema(json: Dict[str, Any]) -> pa.Schema:
    """Reconstruct a PyArrow Schema from a JSON dict.
    Parameters
    ----------
    json : Dict[str, Any]
        The JSON dict to reconstruct Schema from.
    Returns
    -------
    A PyArrow Schema.
    """
    fields = []
    for field in json["fields"]:
        fields.append(_dict_to_field(field))
    metadata = {
        k.encode("utf-8"): v.encode("utf-8")
        for (k, v) in json.get("metadata", {}).items()
    }
    return pa.schema(fields, metadata)
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -16,7 +16,7 @@ from __future__ import annotations
 import os
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import List, Union
+from typing import Iterable, List, Union
 import lance
 import numpy as np
@@ -44,7 +44,7 @@ def _sanitize_data(data, schema, on_bad_vectors, fill_value):
        data = _sanitize_schema(
            data, schema=schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value
        )
-    if not isinstance(data, pa.Table):
+    if not isinstance(data, (pa.Table, Iterable)):
        raise TypeError(f"Unsupported data type: {type(data)}")
    return data
@@ -483,7 +483,7 @@ class LanceTable(Table):
            if schema is None:
                raise ValueError("Either data or schema must be provided")
            data = pa.Table.from_pylist([], schema=schema)
-        lance.write_dataset(data, tbl._dataset_uri, mode=mode)
+        lance.write_dataset(data, tbl._dataset_uri, schema=schema, mode=mode)
        return LanceTable(db, name)
    @classmethod
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lancedb"
-version = "0.1.10"
+version = "0.1.11"
-dependencies = ["pylance~=0.5.0", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic>=2", "attr"]
+dependencies = ["pylance~=0.5.8", "ratelimiter", "retry", "tqdm", "aiohttp", "pydantic>=2", "attr"]
 description = "lancedb"
 authors = [
    { name = "LanceDB Devs", email = "dev@lancedb.com" },
--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -13,6 +13,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
 import lancedb
@@ -75,6 +76,32 @@ def test_ingest_pd(tmp_path):
    assert db.open_table("test").name == db["test"].name
 def test_ingest_record_batch_iterator(tmp_path):
    def batch_reader():
        for i in range(5):
            yield pa.RecordBatch.from_arrays(
                [
                    pa.array([[3.1, 4.1], [5.9, 26.5]]),
                    pa.array(["foo", "bar"]),
                    pa.array([10.0, 20.0]),
                ],
                ["vector", "item", "price"],
            )
    db = lancedb.connect(tmp_path)
    tbl = db.create_table(
        "test",
        batch_reader(),
        schema=pa.schema(
            [
                pa.field("vector", pa.list_(pa.float32())),
                pa.field("item", pa.utf8()),
                pa.field("price", pa.float32()),
            ]
        ),
    )
 def test_create_mode(tmp_path):
    db = lancedb.connect(tmp_path)
    data = pd.DataFrame(
@@ -131,6 +158,9 @@ def test_empty_or_nonexistent_table(tmp_path):
    with pytest.raises(Exception):
        db.open_table("does_not_exist")
    schema = pa.schema([pa.field("a", pa.int32())])
    db.create_table("test", schema=schema)
 def test_replace_index(tmp_path):
    db = lancedb.connect(uri=tmp_path)
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -119,6 +119,7 @@ def test_query_builder_with_different_vector_column():
            columns=["b"],
            nprobes=20,
            refine_factor=None,
            vector_column="foo_vector",
        )
    )
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -1,109 +0,0 @@
 #  Copyright 2023 LanceDB Developers
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import pyarrow as pa
 import lancedb
 from lancedb.schema import dict_to_schema, schema_to_dict
 def test_schema_to_dict():
    schema = pa.schema(
        [
            pa.field("id", pa.int64()),
            pa.field("vector", lancedb.vector(512), nullable=False),
            pa.field(
                "struct",
                pa.struct(
                    [
                        pa.field("a", pa.utf8()),
                        pa.field("b", pa.float32()),
                    ]
                ),
                True,
            ),
            pa.field("d", pa.dictionary(pa.int64(), pa.utf8()), False),
        ],
        metadata={"key": "value"},
    )
    json_schema = schema_to_dict(schema)
    assert json_schema == {
        "fields": [
            {"name": "id", "type": {"type": "int64"}, "nullable": True},
            {
                "name": "vector",
                "type": {
                    "type": "fixed_size_list",
                    "value_type": {"type": "float32"},
                    "width": 512,
                },
                "nullable": False,
            },
            {
                "name": "struct",
                "type": {
                    "type": "struct",
                    "fields": [
                        {"name": "a", "type": {"type": "string"}, "nullable": True},
                        {"name": "b", "type": {"type": "float32"}, "nullable": True},
                    ],
                },
                "nullable": True,
            },
            {
                "name": "d",
                "type": {
                    "type": "dictionary",
                    "index_type": {"type": "int64"},
                    "value_type": {"type": "string"},
                },
                "nullable": False,
            },
        ],
        "metadata": {"key": "value"},
    }
    actual_schema = dict_to_schema(json_schema)
    assert actual_schema == schema
 def test_temporal_types():
    schema = pa.schema(
        [
            pa.field("t32", pa.time32("s")),
            pa.field("t32ms", pa.time32("ms")),
            pa.field("t64", pa.time64("ns")),
            pa.field("ts", pa.timestamp("s")),
            pa.field("ts_us_tz", pa.timestamp("us", tz="America/New_York")),
        ],
    )
    json_schema = schema_to_dict(schema)
    assert json_schema == {
        "fields": [
            {"name": "t32", "type": {"type": "time32:s"}, "nullable": True},
            {"name": "t32ms", "type": {"type": "time32:ms"}, "nullable": True},
            {"name": "t64", "type": {"type": "time64:ns"}, "nullable": True},
            {"name": "ts", "type": {"type": "timestamp:s:"}, "nullable": True},
            {
                "name": "ts_us_tz",
                "type": {"type": "timestamp:us:America/New_York"},
                "nullable": True,
            },
        ],
        "metadata": {},
    }
    actual_schema = dict_to_schema(json_schema)
    assert actual_schema == schema
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -15,6 +15,7 @@ arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true }
 once_cell = "1"
 futures = "0.3"
 half = { workspace = true }
 lance = { workspace = true }
 vectordb = { path = "../../vectordb" }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
--- a/rust/vectordb/Cargo.toml
+++ b/rust/vectordb/Cargo.toml
@@ -13,6 +13,7 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 object_store = { workspace = true }
 snafu = "0.7.4"
 half = { workspace = true }
 lance = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
--- a/rust/vectordb/src/database.rs
+++ b/rust/vectordb/src/database.rs
@@ -27,6 +27,7 @@ pub struct Database {
    object_store: ObjectStore,
    pub(crate) uri: String,
    pub(crate) base_path: object_store::path::Path,
 }
 const LANCE_EXTENSION: &str = "lance";
@@ -43,12 +44,13 @@ impl Database {
    ///
    /// * A [Database] object.
    pub async fn connect(uri: &str) -> Result<Database> {
-        let (object_store, _) = ObjectStore::from_uri(uri).await?;
+        let (object_store, base_path) = ObjectStore::from_uri(uri).await?;
        if object_store.is_local() {
            Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?;
        }
        Ok(Database {
            uri: uri.to_string(),
            base_path,
            object_store,
        })
    }
@@ -70,7 +72,7 @@ impl Database {
    pub async fn table_names(&self) -> Result<Vec<String>> {
        let f = self
            .object_store
-            .read_dir(self.uri.as_str())
+            .read_dir(self.base_path.clone())
            .await?
            .iter()
            .map(|fname| Path::new(fname))
@@ -141,8 +143,9 @@ impl Database {
    /// # Arguments
    /// * `name` - The name of the table.
    pub async fn drop_table(&self, name: &str) -> Result<()> {
-        let dir_name = format!("{}/{}.{}", self.uri, name, LANCE_EXTENSION);
+        let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
-        self.object_store.remove_dir_all(dir_name).await?;
+        let full_path = self.base_path.child(dir_name.clone());
        self.object_store.remove_dir_all(full_path).await?;
        Ok(())
    }
 }
--- a/rust/vectordb/src/table.rs
+++ b/rust/vectordb/src/table.rs
@@ -16,6 +16,7 @@ use std::path::Path;
 use std::sync::Arc;
 use arrow_array::{Float32Array, RecordBatchReader};
 use arrow_schema::SchemaRef;
 use lance::dataset::{Dataset, ReadParams, WriteParams};
 use lance::index::IndexType;
 use snafu::prelude::*;
@@ -144,6 +145,16 @@ impl Table {
        })
    }
    /// Schema of this Table.
    pub fn schema(&self) -> SchemaRef {
        Arc::new(self.dataset.schema().into())
    }
    /// Version of this Table
    pub fn version(&self) -> u64 {
        self.dataset.version().version
    }
    /// Create index on the table.
    pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
        use lance::index::DatasetIndexExt;
@@ -274,6 +285,7 @@ mod tests {
    }
    #[test]
    #[cfg(not(windows))]
    fn test_object_store_path() {
        use std::path::Path as StdPath;
        let p = StdPath::new("s3://bucket/path/to/file");
@@ -350,10 +362,7 @@ mod tests {
            ..Default::default()
        };
-        table
+        table.add(new_batches, Some(param)).await.unwrap();
            .add(new_batches, Some(param))
            .await
            .unwrap();
        assert_eq!(table.count_rows().await.unwrap(), 10);
        assert_eq!(table.name, "test");
    }
Author	SHA1	Message	Date
Lei Xu	2704a4522c	Bump to 0.1.11	2023-07-17 12:45:17 -07:00
Lei Xu	030f07e7f0	Bump minimal lance version to 0.5.8 (#318 )	2023-07-17 12:41:29 -07:00
gsilvestrin	72afa06b7a	feat(node): Add Windows support (#294 )	2023-07-17 08:48:24 -07:00
Lei Xu	088e745e1d	[Python] Create table with Iterator[RecordBatch] and add docs (#316 )	2023-07-16 21:45:55 -07:00
Lei Xu	7a57cddb2c	[Python] Add records to remote (#315 )	2023-07-16 13:24:38 -07:00
Lei Xu	8ff5f88916	[Python] Bug fixes in remote API (#314 )	2023-07-16 11:09:19 -07:00
Lei Xu	028a6e433d	[Python] Get table schema (#313 )	2023-07-15 17:39:37 -07:00
Lei Xu	04c6814fb1	[Rust] Expose Table schema and version in Rust (#312 )	2023-07-14 22:01:23 -07:00
Lei Xu	c62e4ca1eb	Bump lance version to 0.5.7 (#311 )	2023-07-14 17:17:31 -07:00
gsilvestrin	aecc5fc42b	feat(node): Fix npm publish task (#298 )	2023-07-14 13:39:15 -07:00
Chang She	2fdcb307eb	[python] Fix a few minor bugs (#304 )	2023-07-15 03:47:42 +08:00
Tevin Wang	ad18826579	[Documentation Code Testing] build node sdk in release (#307 )	2023-07-14 12:46:48 -07:00
Leon Yee	a8a50591d7	[docs] small fixes (#308 ) Closes #288 and #287	2023-07-14 12:46:31 -07:00
gsilvestrin	6dfe7fabc2	pin half (#310 )	2023-07-14 12:45:05 -07:00
gsilvestrin	2b108e1c80	Updating package-lock.json file (#301 )	2023-07-13 17:50:01 -07:00
Lei Xu	8c9edafccc	[Doc] Add more Python integrations documents (#299 )	2023-07-13 17:09:39 -07:00
Leon Yee	0590413b96	Added transformersJS example to docs and node/examples (#297 )	2023-07-13 17:01:36 -07:00