reformatted

SaaS python SDK doc
2025-12-23 13:29:57 +00:00 · 2023-12-07 12:08:05 -08:00 · 2023-12-07 12:01:03 -08:00
25 changed files with 151 additions and 954 deletions
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -37,10 +37,14 @@ jobs:
          path: |
            node/vectordb-*.tgz

-  node-macos-x86:
+  node-macos:
    runs-on: macos-13
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
+    strategy:
+      fail-fast: false
+      matrix:
+        target: [x86_64-apple-darwin, aarch64-apple-darwin]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -50,30 +54,11 @@ jobs:
        run: |
          cd node
          npm ci
+      - name: Install rustup target
+        if: ${{ matrix.target == 'aarch64-apple-darwin' }}
+        run: rustup target add aarch64-apple-darwin
      - name: Build MacOS native node modules
-        run: bash ci/build_macos_artifacts.sh x86_64-apple-darwin
-      - name: Upload Darwin Artifacts
-        uses: actions/upload-artifact@v3
-        with:
-          name: native-darwin
-          path: |
-            node/dist/lancedb-vectordb-darwin*.tgz
-
-  node-macos-arm64:
-    runs-on: macos-13-xlarge
-    # Only runs on tags that matches the make-release action
-    if: startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-      - name: Install system dependencies
-        run: brew install protobuf
-      - name: Install npm dependencies
-        run: |
-          cd node
-          npm ci
-      - name: Build MacOS native node modules
-        run: bash ci/build_macos_artifacts.sh aarch64-apple-darwin
+        run: bash ci/build_macos_artifacts.sh ${{ matrix.target }}
      - name: Upload Darwin Artifacts
        uses: actions/upload-artifact@v3
        with:
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,10 +5,10 @@ exclude = ["python"]
 resolver = "2"

 [workspace.dependencies]
-lance = { "version" = "=0.8.20", "features" = ["dynamodb"] }
-lance-index = { "version" = "=0.8.20" }
-lance-linalg = { "version" = "=0.8.20" }
-lance-testing = { "version" = "=0.8.20" }
+lance = { "version" = "=0.8.17", "features" = ["dynamodb"] }
+lance-index = { "version" = "=0.8.17" }
+lance-linalg = { "version" = "=0.8.17" }
+lance-testing = { "version" = "=0.8.17" }
 # Note that this one does not include pyarrow
 arrow = { version = "47.0.0", optional = false }
 arrow-array = "47.0"
--- a/README.md
+++ b/README.md
@@ -5,11 +5,10 @@

 **Developer-friendly, serverless vector database for AI applications**

-<a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
-<a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
-[![Medium](https://img.shields.io/badge/Medium-12100E?style=for-the-badge&logo=medium&logoColor=white)](https://blog.lancedb.com/) 
-[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/zMM32dvNtd) 
-[![Twitter](https://img.shields.io/badge/Twitter-%231DA1F2.svg?style=for-the-badge&logo=Twitter&logoColor=white)](https://twitter.com/lancedb)
+<a href="https://lancedb.github.io/lancedb/">Documentation</a> •
+<a href="https://blog.lancedb.com/">Blog</a> •
+<a href="https://discord.gg/zMM32dvNtd">Discord</a> •
+<a href="https://twitter.com/lancedb">Twitter</a>

 </p>

--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -80,6 +80,7 @@ nav:
    - Ingest Embedding Functions: embeddings/embedding_functions.md
    - Available Functions: embeddings/default_embedding_functions.md
    - Create Custom Embedding Functions: embeddings/api.md
+    - Example - Calculate CLIP Embeddings with Roboflow Inference: examples/image_embeddings_roboflow.md
    - Example - Multi-lingual semantic search: notebooks/multi_lingual_example.ipynb
    - Example - MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
  - 🔍 Python full-text search: fts.md
@@ -98,7 +99,6 @@ nav:
    - YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
    - Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
    - Multimodal search using CLIP: notebooks/multimodal_search.ipynb
-    - Example - Calculate CLIP Embeddings with Roboflow Inference: examples/image_embeddings_roboflow.md
    - Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
    - Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
  - 🌐 Javascript examples:
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -316,18 +316,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.3.9",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.9.tgz",
-      "integrity": "sha512-irtAdfSRQDcfnMnB8T7D0atLFfu1MMZZ1JaxMKu24DDZ8e4IMYKUplxwvWni3241yA9yDE/pliRZCNQbQCEfrg==",
-      "cpu": [
-        "arm64"
-      ],
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
      "version": "0.3.9",
      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.9.tgz",
@@ -4868,12 +4856,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "@lancedb/vectordb-darwin-arm64": {
-      "version": "0.3.9",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.9.tgz",
-      "integrity": "sha512-irtAdfSRQDcfnMnB8T7D0atLFfu1MMZZ1JaxMKu24DDZ8e4IMYKUplxwvWni3241yA9yDE/pliRZCNQbQCEfrg==",
-      "optional": true
-    },
    "@lancedb/vectordb-darwin-x64": {
      "version": "0.3.9",
      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.9.tgz",
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -21,10 +21,9 @@ import type { EmbeddingFunction } from './embedding/embedding_function'
 import { RemoteConnection } from './remote'
 import { Query } from './query'
 import { isEmbeddingFunction } from './embedding/embedding_function'
-import { type Literal, toSQL } from './util'

 // eslint-disable-next-line @typescript-eslint/no-var-requires
-const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableUpdate, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats } = require('../native.js')
+const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats } = require('../native.js')

 export { Query }
 export type { EmbeddingFunction }
@@ -262,39 +261,6 @@ export interface Table<T = number[]> {
   */
  delete: (filter: string) => Promise<void>

-  /**
-   * Update rows in this table.
-   *
-   * This can be used to update a single row, many rows, all rows, or
-   * sometimes no rows (if your predicate matches nothing).
-   *
-   * @param args see {@link UpdateArgs} and {@link UpdateSqlArgs} for more details
-   *
-   * @examples
-   *
-   * ```ts
-   * const con = await lancedb.connect("./.lancedb")
-   * const data = [
-   *    {id: 1, vector: [3, 3], name: 'Ye'},
-   *    {id: 2, vector: [4, 4], name: 'Mike'},
-   * ];
-   * const tbl = await con.createTable("my_table", data)
-   *
-   * await tbl.update({
-   *   filter: "id = 2",
-   *   updates: { vector: [2, 2], name: "Michael" },
-   * })
-   *
-   * let results = await tbl.search([1, 1]).execute();
-   * // Returns [
-   * //   {id: 2, vector: [2, 2], name: 'Michael'}
-   * //   {id: 1, vector: [3, 3], name: 'Ye'}
-   * // ]
-   * ```
-   *
-   */
-  update: (args: UpdateArgs | UpdateSqlArgs) => Promise<void>
-
  /**
   * List the indicies on this table.
   */
@@ -306,34 +272,6 @@ export interface Table<T = number[]> {
  indexStats: (indexUuid: string) => Promise<IndexStats>
 }

-export interface UpdateArgs {
-  /**
-   * A filter in the same format used by a sql WHERE clause. The filter may be empty,
-   * in which case all rows will be updated.
-   */
-  where?: string
-
-  /**
-   * A key-value map of updates. The keys are the column names, and the values are the
-   * new values to set
-   */
-  values: Record<string, Literal>
-}
-
-export interface UpdateSqlArgs {
-  /**
-   * A filter in the same format used by a sql WHERE clause. The filter may be empty,
-   * in which case all rows will be updated.
-   */
-  where?: string
-
-  /**
-   * A key-value map of updates. The keys are the column names, and the values are the
-   * new values to set as SQL expressions.
-   */
-  valuesSql: Record<string, string>
-}
-
 export interface VectorIndex {
  columns: string[]
  name: string
@@ -488,16 +426,6 @@ export class LocalTable<T = number[]> implements Table<T> {
    return new Query(query, this._tbl, this._embeddings)
  }

-  /**
-   * Creates a filter query to find all rows matching the specified criteria
-   * @param value The filter criteria (like SQL where clause syntax)
-   */
-  filter (value: string): Query<T> {
-    return new Query(undefined, this._tbl, this._embeddings).filter(value)
-  }
-
-  where = this.filter
-
  /**
   * Insert records into this Table.
   *
@@ -553,31 +481,6 @@ export class LocalTable<T = number[]> implements Table<T> {
    return tableDelete.call(this._tbl, filter).then((newTable: any) => { this._tbl = newTable })
  }

-  /**
-   * Update rows in this table.
-   *
-   * @param args see {@link UpdateArgs} and {@link UpdateSqlArgs} for more details
-   *
-   * @returns
-   */
-  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
-    let filter: string | null
-    let updates: Record<string, string>
-
-    if ('valuesSql' in args) {
-      filter = args.where ?? null
-      updates = args.valuesSql
-    } else {
-      filter = args.where ?? null
-      updates = {}
-      for (const [key, value] of Object.entries(args.values)) {
-        updates[key] = toSQL(value)
-      }
-    }
-
-    return tableUpdate.call(this._tbl, filter, updates).then((newTable: any) => { this._tbl = newTable })
-  }
-
  /**
   * Clean up old versions of the table, freeing disk space.
   *
--- a/node/src/query.ts
+++ b/node/src/query.ts
@@ -23,10 +23,10 @@ const { tableSearch } = require('../native.js')
 * A builder for nearest neighbor queries for LanceDB.
 */
 export class Query<T = number[]> {
-  private readonly _query?: T
+  private readonly _query: T
  private readonly _tbl?: any
  private _queryVector?: number[]
-  private _limit?: number
+  private _limit: number
  private _refineFactor?: number
  private _nprobes: number
  private _select?: string[]
@@ -35,10 +35,10 @@ export class Query<T = number[]> {
  private _prefilter: boolean
  protected readonly _embeddings?: EmbeddingFunction<T>

-  constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
+  constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
    this._tbl = tbl
    this._query = query
-    this._limit = undefined
+    this._limit = 10
    this._nprobes = 20
    this._refineFactor = undefined
    this._select = undefined
@@ -113,13 +113,11 @@ export class Query<T = number[]> {
     * Execute the query and return the results as an Array of Objects
     */
  async execute<T = Record<string, unknown>> (): Promise<T[]> {
-    if (this._query !== undefined) {
    if (this._embeddings !== undefined) {
      this._queryVector = (await this._embeddings.embed([this._query]))[0]
    } else {
      this._queryVector = this._query as number[]
    }
-    }

    const isElectron = this.isElectron()
    const buffer = await tableSearch.call(this._tbl, this, isElectron)
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -16,8 +16,7 @@ import {
  type EmbeddingFunction, type Table, type VectorIndexParams, type Connection,
  type ConnectionOptions, type CreateTableOptions, type VectorIndex,
  type WriteOptions,
-  type IndexStats,
-  type UpdateArgs, type UpdateSqlArgs
+  type IndexStats
 } from '../index'
 import { Query } from '../query'

@@ -247,10 +246,6 @@ export class RemoteTable<T = number[]> implements Table<T> {
    await this._client.post(`/v1/table/${this._name}/delete/`, { predicate: filter })
  }

-  async update (args: UpdateArgs | UpdateSqlArgs): Promise<void> {
-    throw new Error('Not implemented')
-  }
-
  async listIndices (): Promise<VectorIndex[]> {
    const results = await this._client.post(`/v1/table/${this._name}/index/list/`)
    return results.data.indexes?.map((index: any) => ({
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -78,31 +78,12 @@ describe('LanceDB client', function () {
    })

    it('limits # of results', async function () {
-      const uri = await createTestDB(2, 100)
+      const uri = await createTestDB()
      const con = await lancedb.connect(uri)
      const table = await con.openTable('vectors')
-      let results = await table.search([0.1, 0.3]).limit(1).execute()
+      const results = await table.search([0.1, 0.3]).limit(1).execute()
      assert.equal(results.length, 1)
      assert.equal(results[0].id, 1)
-
-      // there is a default limit if unspecified
-      results = await table.search([0.1, 0.3]).execute()
-      assert.equal(results.length, 10)
-    })
-
-    it('uses a filter / where clause without vector search', async function () {
-      // eslint-disable-next-line @typescript-eslint/explicit-function-return-type
-      const assertResults = (results: Array<Record<string, unknown>>) => {
-        assert.equal(results.length, 50)
-      }
-
-      const uri = await createTestDB(2, 100)
-      const con = await lancedb.connect(uri)
-      const table = (await con.openTable('vectors')) as LocalTable
-      let results = await table.filter('id % 2 = 0').execute()
-      assertResults(results)
-      results = await table.where('id % 2 = 0').execute()
-      assertResults(results)
    })

    it('uses a filter / where clause', async function () {
@@ -279,46 +260,6 @@ describe('LanceDB client', function () {
      assert.equal(await table.countRows(), 2)
    })

-    it('can update records in the table', async function () {
-      const uri = await createTestDB()
-      const con = await lancedb.connect(uri)
-
-      const table = await con.openTable('vectors')
-      assert.equal(await table.countRows(), 2)
-
-      await table.update({ where: 'price = 10', valuesSql: { price: '100' } })
-      const results = await table.search([0.1, 0.2]).execute()
-      assert.equal(results[0].price, 100)
-      assert.equal(results[1].price, 11)
-    })
-
-    it('can update the records using a literal value', async function () {
-      const uri = await createTestDB()
-      const con = await lancedb.connect(uri)
-
-      const table = await con.openTable('vectors')
-      assert.equal(await table.countRows(), 2)
-
-      await table.update({ where: 'price = 10', values: { price: 100 } })
-      const results = await table.search([0.1, 0.2]).execute()
-      assert.equal(results[0].price, 100)
-      assert.equal(results[1].price, 11)
-    })
-
-    it('can update every record in the table', async function () {
-      const uri = await createTestDB()
-      const con = await lancedb.connect(uri)
-
-      const table = await con.openTable('vectors')
-      assert.equal(await table.countRows(), 2)
-
-      await table.update({ valuesSql: { price: '100' } })
-      const results = await table.search([0.1, 0.2]).execute()
-
-      assert.equal(results[0].price, 100)
-      assert.equal(results[1].price, 100)
-    })
-
    it('can delete records from a table', async function () {
      const uri = await createTestDB()
      const con = await lancedb.connect(uri)
@@ -601,7 +542,7 @@ describe('Compact and cleanup', function () {

    // should have no effect, but this validates the arguments are parsed.
    await table.compactFiles({
-      targetRowsPerFragment: 102410,
+      targetRowsPerFragment: 1024 * 10,
      maxRowsPerGroup: 1024,
      materializeDeletions: true,
      materializeDeletionsThreshold: 0.5,
--- a/node/src/test/util.ts
+++ b/node/src/test/util.ts
@@ -1,45 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import { toSQL } from '../util'
-import * as chai from 'chai'
-
-const expect = chai.expect
-
-describe('toSQL', function () {
-  it('should turn string to SQL expression', function () {
-    expect(toSQL('foo')).to.equal("'foo'")
-  })
-
-  it('should turn number to SQL expression', function () {
-    expect(toSQL(123)).to.equal('123')
-  })
-
-  it('should turn boolean to SQL expression', function () {
-    expect(toSQL(true)).to.equal('TRUE')
-  })
-
-  it('should turn null to SQL expression', function () {
-    expect(toSQL(null)).to.equal('NULL')
-  })
-
-  it('should turn Date to SQL expression', function () {
-    const date = new Date('05 October 2011 14:48 UTC')
-    expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'")
-  })
-
-  it('should turn array to SQL expression', function () {
-    expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]")
-  })
-})
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -1,44 +0,0 @@
-// Copyright 2023 LanceDB Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-export type Literal = string | number | boolean | null | Date | Literal[]
-
-export function toSQL (value: Literal): string {
-  if (typeof value === 'string') {
-    return `'${value}'`
-  }
-
-  if (typeof value === 'number') {
-    return value.toString()
-  }
-
-  if (typeof value === 'boolean') {
-    return value ? 'TRUE' : 'FALSE'
-  }
-
-  if (value === null) {
-    return 'NULL'
-  }
-
-  if (value instanceof Date) {
-    return `'${value.toISOString()}'`
-  }
-
-  if (Array.isArray(value)) {
-    return `[${value.map(toSQL).join(', ')}]`
-  }
-
-  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
-}
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.5
+current_version = 0.3.4
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/lancedb/remote/db.py
+++ b/python/lancedb/remote/db.py
@@ -56,7 +56,7 @@ class RemoteDBConnection(DBConnection):
            self._loop = asyncio.get_event_loop()

    def __repr__(self) -> str:
-        return f"RemoteConnect(name={self.db_name})"
+        return f"RemoveConnect(name={self.db_name})"

    @override
    def table_names(
@@ -167,10 +167,10 @@ class RemoteDBConnection(DBConnection):
        Can create with list of tuples or dictionaries:

        >>> import lancedb
-        >>> db = lancedb.connect("db://...", api_key="...", region="...") # doctest: +SKIP
+        >>> db = lancedb.connect("db://test-project-8f45eb")
        >>> data = [{"vector": [1.1, 1.2], "lat": 45.5, "long": -122.7},
        ...         {"vector": [0.2, 1.8], "lat": 40.1, "long":  -74.1}]
-        >>> db.create_table("my_table", data) # doctest: +SKIP
+        >>> db.create_table("my_table", data)
        LanceTable(my_table)

        You can also pass a pandas DataFrame:
@@ -181,7 +181,7 @@ class RemoteDBConnection(DBConnection):
        ...    "lat": [45.5, 40.1],
        ...    "long": [-122.7, -74.1]
        ... })
-        >>> db.create_table("table2", data) # doctest: +SKIP
+        >>> db.create_table("table2", data)
        LanceTable(table2)

        >>> custom_schema = pa.schema([
@@ -189,7 +189,7 @@ class RemoteDBConnection(DBConnection):
        ...   pa.field("lat", pa.float32()),
        ...   pa.field("long", pa.float32())
        ... ])
-        >>> db.create_table("table3", data, schema = custom_schema) # doctest: +SKIP
+        >>> db.create_table("table3", data, schema = custom_schema)
        LanceTable(table3)

        It is also possible to create an table from `[Iterable[pa.RecordBatch]]`:
@@ -211,7 +211,7 @@ class RemoteDBConnection(DBConnection):
        ...     pa.field("item", pa.utf8()),
        ...     pa.field("price", pa.float32()),
        ... ])
-        >>> db.create_table("table4", make_batches(), schema=schema) # doctest: +SKIP
+        >>> db.create_table("table4", make_batches(), schema=schema)
        LanceTable(table4)

        """
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -85,7 +85,7 @@ class RemoteTable(Table):
        >>> import lancedb
        >>> import uuid
        >>> from lancedb.schema import vector
-        >>> db = lancedb.connect("db://...", api_key="...", region="...") # doctest: +SKIP
+        >>> conn = lancedb.connect("db://...", api_key="...", region="...")
        >>> table_name = uuid.uuid4().hex
        >>> schema = pa.schema(
        ...     [
@@ -94,11 +94,11 @@ class RemoteTable(Table):
        ...             pa.field("s", pa.string(), False),
        ...     ]
        ... )
-        >>> table = db.create_table( # doctest: +SKIP
-        ...     table_name, # doctest: +SKIP
-        ...     schema=schema, # doctest: +SKIP
-        ... )
-        >>> table.create_index("L2", "vector") # doctest: +SKIP
+        >>> table = conn.create_table(
+        >>>     table_name,
+        >>>     schema=schema,
+        >>> )
+        >>> table.create_index("L2", "vector")
        """
        index_type = "vector"

@@ -173,22 +173,22 @@ class RemoteTable(Table):
        Examples
        --------
        >>> import lancedb
-        >>> db = lancedb.connect("db://...", api_key="...", region="...") # doctest: +SKIP
+        >>> db = lancedb.connect("db://...", api_key="...", region="...")
        >>> data = [
        ...    {"original_width": 100, "caption": "bar", "vector": [0.1, 2.3, 4.5]},
        ...    {"original_width": 2000, "caption": "foo",  "vector": [0.5, 3.4, 1.3]},
        ...    {"original_width": 3000, "caption": "test", "vector": [0.3, 6.2, 2.6]}
        ... ]
-        >>> table = db.create_table("my_table", data) # doctest: +SKIP
+        >>> table = db.create_table("my_table", data)
        >>> query = [0.4, 1.4, 2.4]
-        >>> (table.search(query, vector_column_name="vector") # doctest: +SKIP
-        ...     .where("original_width > 1000", prefilter=True) # doctest: +SKIP
-        ...     .select(["caption", "original_width"]) # doctest: +SKIP
-        ...     .limit(2) # doctest: +SKIP
-        ...     .to_pandas()) # doctest: +SKIP
-          caption  original_width           vector  _distance # doctest: +SKIP
-        0     foo            2000  [0.5, 3.4, 1.3]   5.220000 # doctest: +SKIP
-        1    test            3000  [0.3, 6.2, 2.6]  23.089996 # doctest: +SKIP
+        >>> (table.search(query, vector_column_name="vector")
+        ...     .where("original_width > 1000", prefilter=True)
+        ...     .select(["caption", "original_width"])
+        ...     .limit(2)
+        ...     .to_pandas())
+          caption  original_width           vector  _distance
+        0     foo            2000  [0.5, 3.4, 1.3]   5.220000
+        1    test            3000  [0.3, 6.2, 2.6]  23.089996

        Parameters
        ----------
@@ -246,28 +246,30 @@ class RemoteTable(Table):
        ...    {"x": 2, "vector": [3, 4]},
        ...    {"x": 3, "vector": [5, 6]}
        ... ]
-        >>> db = lancedb.connect("db://...", api_key="...", region="...") # doctest: +SKIP
-        >>> table = db.create_table("my_table", data) # doctest: +SKIP
-        >>> table.search([10,10]).to_pandas() # doctest: +SKIP
-           x      vector  _distance # doctest: +SKIP
-        0  3  [5.0, 6.0]       41.0 # doctest: +SKIP
-        1  2  [3.0, 4.0]       85.0 # doctest: +SKIP
-        2  1  [1.0, 2.0]      145.0 # doctest: +SKIP
-        >>> table.delete("x = 2") # doctest: +SKIP
-        >>> table.search([10,10]).to_pandas() # doctest: +SKIP
-           x      vector  _distance # doctest: +SKIP
-        0  3  [5.0, 6.0]       41.0 # doctest: +SKIP
-        1  1  [1.0, 2.0]      145.0 # doctest: +SKIP
+        >>> db = lancedb.connect("db://...", api_key="...", region="...")
+        >>> table = db.create_table("my_table", data)
+        >>> table.search([10,10]).to_pandas()
+           x      vector  _distance
+        0  3  [5.0, 6.0]       41.0
+        1  2  [3.0, 4.0]       85.0
+        2  1  [1.0, 2.0]      145.0
+        >>> table.delete("x = 2")
+        >>> table.search([10,10]).to_pandas()
+           x      vector  _distance
+        0  3  [5.0, 6.0]       41.0
+        1  1  [1.0, 2.0]      145.0

        If you have a list of values to delete, you can combine them into a
        stringified list and use the `IN` operator:

-        >>> to_remove = [1, 3] # doctest: +SKIP
-        >>> to_remove = ", ".join([str(v) for v in to_remove]) # doctest: +SKIP
-        >>> table.delete(f"x IN ({to_remove})") # doctest: +SKIP
-        >>> table.search([10,10]).to_pandas() # doctest: +SKIP
-           x      vector  _distance # doctest: +SKIP
-        0  2  [3.0, 4.0]       85.0 # doctest: +SKIP
+        >>> to_remove = [1, 3]
+        >>> to_remove = ", ".join([str(v) for v in to_remove])
+        >>> to_remove
+        '1, 3'
+        >>> table.delete(f"x IN ({to_remove})")
+        >>> table.search([10,10]).to_pandas()
+           x      vector  _distance
+        0  2  [3.0, 4.0]       85.0
        """
        payload = {"predicate": predicate}
        self._conn._loop.run_until_complete(
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -17,7 +17,7 @@ import inspect
 import os
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Union

 import lance
 import numpy as np
@@ -30,7 +30,7 @@ from .common import DATA, VEC, VECTOR_COLUMN_NAME
 from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
 from .pydantic import LanceModel
 from .query import LanceQueryBuilder, Query
-from .util import fs_from_uri, safe_import_pandas, value_to_sql
+from .util import fs_from_uri, safe_import_pandas
 from .utils.events import register_event

 if TYPE_CHECKING:
@@ -785,7 +785,7 @@ class LanceTable(Table):
            and also the "_distance" column which is the distance between the query
            vector and the returned vector.
        """
-        register_event("search_table")
+        register_event("search")
        return LanceQueryBuilder.create(
            self, query, query_type, vector_column_name=vector_column_name
        )
@@ -906,42 +906,35 @@ class LanceTable(Table):
                f"Table {name} does not exist."
                f"Please first call db.create_table({name}, data)"
            )
-        register_event("open_table")
-
        return tbl

    def delete(self, where: str):
        self._dataset.delete(where)

-    def update(
-        self,
-        where: Optional[str] = None,
-        values: Optional[dict] = None,
-        *,
-        values_sql: Optional[Dict[str, str]] = None,
-    ):
+    def update(self, where: str, values: dict):
        """
+        EXPERIMENTAL: Update rows in the table (not threadsafe).
+
        This can be used to update zero to all rows depending on how many
        rows match the where clause.

        Parameters
        ----------
-        where: str, optional
+        where: str
            The SQL where clause to use when updating rows. For example, 'x = 2'
            or 'x IN (1, 2, 3)'. The filter must not be empty, or it will error.
-        values: dict, optional
+        values: dict
            The values to update. The keys are the column names and the values
            are the values to set.
-        values_sql: dict, optional
-            The values to update, expressed as SQL expression strings. These can
-            reference existing columns. For example, {"x": "x + 1"} will increment
-            the x column by 1.

        Examples
        --------
        >>> import lancedb
-        >>> import pandas as pd
-        >>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]})
+        >>> data = [
+        ...    {"x": 1, "vector": [1, 2]},
+        ...    {"x": 2, "vector": [3, 4]},
+        ...    {"x": 3, "vector": [5, 6]}
+        ... ]
        >>> db = lancedb.connect("./.lancedb")
        >>> table = db.create_table("my_table", data)
        >>> table.to_pandas()
@@ -957,15 +950,18 @@ class LanceTable(Table):
        2  2  [10.0, 10.0]

        """
-        if values is not None and values_sql is not None:
-            raise ValueError("Only one of values or values_sql can be provided")
-        if values is None and values_sql is None:
-            raise ValueError("Either values or values_sql must be provided")
-
-        if values is not None:
-            values_sql = {k: value_to_sql(v) for k, v in values.items()}
-
-        self.to_lance().update(values_sql, where)
+        orig_data = self._dataset.to_table(filter=where).combine_chunks()
+        if len(orig_data) == 0:
+            return
+        for col, val in values.items():
+            i = orig_data.column_names.index(col)
+            if i < 0:
+                raise ValueError(f"Column {col} does not exist")
+            orig_data = orig_data.set_column(
+                i, col, pa.array([val] * len(orig_data), type=orig_data[col].type)
+            )
+        self.delete(where)
+        self.add(orig_data, mode="append")
        self._reset_dataset()
        register_event("update")

--- a/python/lancedb/util.py
+++ b/python/lancedb/util.py
@@ -12,12 +12,9 @@
 #  limitations under the License.

 import os
-from datetime import date, datetime
-from functools import singledispatch
 from typing import Tuple
 from urllib.parse import urlparse

-import numpy as np
 import pyarrow.fs as pa_fs


@@ -91,53 +88,3 @@ def safe_import_pandas():
        return pd
    except ImportError:
        return None
-
-
-@singledispatch
-def value_to_sql(value):
-    raise NotImplementedError("SQL conversion is not implemented for this type")
-
-
-@value_to_sql.register(str)
-def _(value: str):
-    return f"'{value}'"
-
-
-@value_to_sql.register(int)
-def _(value: int):
-    return str(value)
-
-
-@value_to_sql.register(float)
-def _(value: float):
-    return str(value)
-
-
-@value_to_sql.register(bool)
-def _(value: bool):
-    return str(value).upper()
-
-
-@value_to_sql.register(type(None))
-def _(value: type(None)):
-    return "NULL"
-
-
-@value_to_sql.register(datetime)
-def _(value: datetime):
-    return f"'{value.isoformat()}'"
-
-
-@value_to_sql.register(date)
-def _(value: date):
-    return f"'{value.isoformat()}'"
-
-
-@value_to_sql.register(list)
-def _(value: list):
-    return "[" + ", ".join(map(value_to_sql, value)) + "]"
-
-
-@value_to_sql.register(np.ndarray)
-def _(value: np.ndarray):
-    return value_to_sql(value.tolist())
--- a/python/lancedb/utils/events.py
+++ b/python/lancedb/utils/events.py
@@ -64,10 +64,8 @@ class _Events:
        Initializes the Events object with default values for events, rate_limit, and metadata.
        """
        self.events = []  # events list
-        self.throttled_event_names = ["search_table"]
-        self.throttled_events = set()
-        self.max_events = 5  # max events to store in memory
-        self.rate_limit = 60.0 * 5  # rate limit (seconds)
+        self.max_events = 25  # max events to store in memory
+        self.rate_limit = 60.0  # rate limit (seconds)
        self.time = 0.0

        if is_git_dir():
@@ -114,9 +112,10 @@ class _Events:
            return
        if (
            len(self.events) < self.max_events
-        ):  # Events list limited to self.max_events (drop any events past this)
+        ):  # Events list limited to 25 events (drop any events past this)
            params.update(self.metadata)
-            event = {
+            self.events.append(
+                {
                    "event": event_name,
                    "properties": params,
                    "timestamp": datetime.datetime.now(
@@ -124,11 +123,7 @@ class _Events:
                    ).isoformat(),
                    "distinct_id": CONFIG["uuid"],
                }
-            if event_name not in self.throttled_event_names:
-                self.events.append(event)
-            elif event_name not in self.throttled_events:
-                self.throttled_events.add(event_name)
-                self.events.append(event)
+            )

        # Check rate limit
        t = time.time()
@@ -140,6 +135,7 @@ class _Events:
            "distinct_id": CONFIG["uuid"],  # posthog needs this to accepts the event
            "batch": self.events,
        }
+
        # POST equivalent to requests.post(self.url, json=data).
        # threaded request is used to avoid blocking, retries are disabled, and verbose is disabled
        # to avoid any possible disruption in the console.
@@ -154,7 +150,6 @@ class _Events:

        # Flush & Reset
        self.events = []
-        self.throttled_events = set()
        self.time = t


--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,12 +1,12 @@
 [project]
 name = "lancedb"
-version = "0.3.5"
+version = "0.3.4"
 dependencies = [
    "deprecation",
-    "pylance==0.8.21",
+    "pylance==0.8.17",
    "ratelimiter~=1.0",
    "retry>=0.9.2",
-    "tqdm>=4.27.0",
+    "tqdm>=4.1.0",
    "aiohttp",
    "pydantic>=1.10",
    "attrs>=21.3.0",
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -12,7 +12,7 @@
 #  limitations under the License.

 import functools
-from datetime import date, datetime, timedelta
+from datetime import timedelta
 from pathlib import Path
 from typing import List
 from unittest.mock import PropertyMock, patch
@@ -348,79 +348,14 @@ def test_update(db):
    assert len(table) == 2
    assert len(table.list_versions()) == 2
    table.update(where="id=0", values={"vector": [1.1, 1.1]})
-    assert len(table.list_versions()) == 3
-    assert table.version == 3
+    assert len(table.list_versions()) == 4
+    assert table.version == 4
    assert len(table) == 2
    v = table.to_arrow()["vector"].combine_chunks()
    v = v.values.to_numpy().reshape(2, 2)
    assert np.allclose(v, np.array([[1.2, 1.9], [1.1, 1.1]]))


-def test_update_types(db):
-    table = LanceTable.create(
-        db,
-        "my_table",
-        data=[
-            {
-                "id": 0,
-                "str": "foo",
-                "float": 1.1,
-                "timestamp": datetime(2021, 1, 1),
-                "date": date(2021, 1, 1),
-                "vector1": [1.0, 0.0],
-                "vector2": [1.0, 1.0],
-            }
-        ],
-    )
-    # Update with SQL
-    table.update(
-        values_sql=dict(
-            id="1",
-            str="'bar'",
-            float="2.2",
-            timestamp="TIMESTAMP '2021-01-02 00:00:00'",
-            date="DATE '2021-01-02'",
-            vector1="[2.0, 2.0]",
-            vector2="[3.0, 3.0]",
-        )
-    )
-    actual = table.to_arrow().to_pylist()[0]
-    expected = dict(
-        id=1,
-        str="bar",
-        float=2.2,
-        timestamp=datetime(2021, 1, 2),
-        date=date(2021, 1, 2),
-        vector1=[2.0, 2.0],
-        vector2=[3.0, 3.0],
-    )
-    assert actual == expected
-
-    # Update with values
-    table.update(
-        values=dict(
-            id=2,
-            str="baz",
-            float=3.3,
-            timestamp=datetime(2021, 1, 3),
-            date=date(2021, 1, 3),
-            vector1=[3.0, 3.0],
-            vector2=np.array([4.0, 4.0]),
-        )
-    )
-    actual = table.to_arrow().to_pylist()[0]
-    expected = dict(
-        id=2,
-        str="baz",
-        float=3.3,
-        timestamp=datetime(2021, 1, 3),
-        date=date(2021, 1, 3),
-        vector1=[3.0, 3.0],
-        vector2=[4.0, 4.0],
-    )
-    assert actual == expected
-
-
 def test_create_with_embedding_function(db):
    class MyTable(LanceModel):
        text: str
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -237,7 +237,6 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
    cx.export_function("tableAdd", JsTable::js_add)?;
    cx.export_function("tableCountRows", JsTable::js_count_rows)?;
    cx.export_function("tableDelete", JsTable::js_delete)?;
-    cx.export_function("tableUpdate", JsTable::js_update)?;
    cx.export_function("tableCleanupOldVersions", JsTable::js_cleanup)?;
    cx.export_function("tableCompactFiles", JsTable::js_compact)?;
    cx.export_function("tableListIndices", JsTable::js_list_indices)?;
--- a/rust/ffi/node/src/query.rs
+++ b/rust/ffi/node/src/query.rs
@@ -23,14 +23,8 @@ impl JsQuery {
        let query_obj = cx.argument::<JsObject>(0)?;

        let limit = query_obj
-            .get_opt::<JsNumber, _, _>(&mut cx, "_limit")?
-            .map(|value| {
-                let limit = value.value(&mut cx) as u64;
-                if limit <= 0 {
-                    panic!("Limit must be a positive integer");
-                }
-                limit
-            });
+            .get::<JsNumber, _, _>(&mut cx, "_limit")?
+            .value(&mut cx);
        let select = query_obj
            .get_opt::<JsArray, _, _>(&mut cx, "_select")?
            .map(|arr| {
@@ -54,9 +48,7 @@ impl JsQuery {
            .map(|s| s.value(&mut cx))
            .map(|s| MetricType::try_from(s.as_str()).unwrap());

-        let prefilter = query_obj
-            .get::<JsBoolean, _, _>(&mut cx, "_prefilter")?
-            .value(&mut cx);
+        let prefilter = query_obj.get::<JsBoolean, _, _>(&mut cx, "_prefilter")?.value(&mut cx);

        let is_electron = cx
            .argument::<JsBoolean>(1)
@@ -67,23 +59,20 @@ impl JsQuery {

        let (deferred, promise) = cx.promise();
        let channel = cx.channel();
-        let query_vector = query_obj.get_opt::<JsArray, _, _>(&mut cx, "_queryVector")?;
+        let query_vector = query_obj.get::<JsArray, _, _>(&mut cx, "_queryVector")?;
+        let query = convert::js_array_to_vec(query_vector.deref(), &mut cx);
        let table = js_table.table.clone();
-        let query = query_vector.map(|q| convert::js_array_to_vec(q.deref(), &mut cx));

        rt.spawn(async move {
-            let mut builder = table
-                .search(query.map(|q| Float32Array::from(q)))
+            let builder = table
+                .search(Float32Array::from(query))
+                .limit(limit as usize)
                .refine_factor(refine_factor)
                .nprobes(nprobes)
                .filter(filter)
                .metric_type(metric_type)
                .select(select)
                .prefilter(prefilter);
-            if let Some(limit) = limit {
-                builder = builder.limit(limit as usize);
-            };
-
            let record_batch_stream = builder.execute();
            let results = record_batch_stream
                .and_then(|stream| {
--- a/rust/ffi/node/src/table.rs
+++ b/rust/ffi/node/src/table.rs
@@ -165,69 +165,6 @@ impl JsTable {
        Ok(promise)
    }

-    pub(crate) fn js_update(mut cx: FunctionContext) -> JsResult<JsPromise> {
-        let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
-        let mut table = js_table.table.clone();
-
-        let rt = runtime(&mut cx)?;
-        let (deferred, promise) = cx.promise();
-        let channel = cx.channel();
-
-        // create a vector of updates from the passed map
-        let updates_arg = cx.argument::<JsObject>(1)?;
-        let properties = updates_arg.get_own_property_names(&mut cx)?;
-        let mut updates: Vec<(String, String)> =
-            Vec::with_capacity(properties.len(&mut cx) as usize);
-
-        let len_properties = properties.len(&mut cx);
-        for i in 0..len_properties {
-            let property = properties
-                .get_value(&mut cx, i)?
-                .downcast_or_throw::<JsString, _>(&mut cx)?;
-
-            let value = updates_arg
-                .get_value(&mut cx, property.clone())?
-                .downcast_or_throw::<JsString, _>(&mut cx)?;
-
-            let property = property.value(&mut cx);
-            let value = value.value(&mut cx);
-            updates.push((property, value));
-        }
-
-        // get the filter/predicate if the user passed one
-        let predicate = cx.argument_opt(0);
-        let predicate = predicate.unwrap().downcast::<JsString, _>(&mut cx);
-        let predicate = match predicate {
-            Ok(_) => {
-                let val = predicate.map(|s| s.value(&mut cx)).unwrap();
-                Some(val)
-            }
-            Err(_) => {
-                // if the predicate is not string, check it's null otherwise an invalid
-                // type was passed
-                cx.argument::<JsNull>(0)?;
-                None
-            }
-        };
-
-        rt.spawn(async move {
-            let updates_arg = updates
-                .iter()
-                .map(|(k, v)| (k.as_str(), v.as_str()))
-                .collect::<Vec<_>>();
-
-            let predicate = predicate.as_ref().map(|s| s.as_str());
-
-            let update_result = table.update(predicate, updates_arg).await;
-            deferred.settle_with(&channel, move |mut cx| {
-                update_result.or_throw(&mut cx)?;
-                Ok(cx.boxed(JsTable::from(table)))
-            })
-        });
-
-        Ok(promise)
-    }
-
    pub(crate) fn js_cleanup(mut cx: FunctionContext) -> JsResult<JsPromise> {
        let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
        let rt = runtime(&mut cx)?;
--- a/rust/vectordb/src/io/object_store.rs
+++ b/rust/vectordb/src/io/object_store.rs
@@ -359,7 +359,7 @@ mod test {
        assert_eq!(t.count_rows().await.unwrap(), 100);

        let q = t
-            .search(Some(PrimitiveArray::from_iter_values(vec![0.1, 0.1, 0.1, 0.1])))
+            .search(PrimitiveArray::from_iter_values(vec![0.1, 0.1, 0.1, 0.1]))
            .limit(10)
            .execute()
            .await
--- a/rust/vectordb/src/query.rs
+++ b/rust/vectordb/src/query.rs
@@ -24,8 +24,8 @@ use crate::error::Result;
 /// A builder for nearest neighbor queries for LanceDB.
 pub struct Query {
    pub dataset: Arc<Dataset>,
-    pub query_vector: Option<Float32Array>,
-    pub limit: Option<usize>,
+    pub query_vector: Float32Array,
+    pub limit: usize,
    pub filter: Option<String>,
    pub select: Option<Vec<String>>,
    pub nprobes: usize,
@@ -46,11 +46,11 @@ impl Query {
    /// # Returns
    ///
    /// * A [Query] object.
-    pub(crate) fn new(dataset: Arc<Dataset>, vector: Option<Float32Array>) -> Self {
+    pub(crate) fn new(dataset: Arc<Dataset>, vector: Float32Array) -> Self {
        Query {
            dataset,
            query_vector: vector,
-            limit: None,
+            limit: 10,
            nprobes: 20,
            refine_factor: None,
            metric_type: None,
@@ -69,13 +69,11 @@ impl Query {
    pub async fn execute(&self) -> Result<DatasetRecordBatchStream> {
        let mut scanner: Scanner = self.dataset.scan();

-        if let Some(query) = self.query_vector.as_ref() {
-            // If there is a vector query, default to limit=10 if unspecified
-            scanner.nearest(crate::table::VECTOR_COLUMN_NAME, query, self.limit.unwrap_or(10))?;
-        } else {
-            // If there is no vector query, it's ok to not have a limit
-            scanner.limit(self.limit.map(|limit| limit as i64), None)?;
-        }
+        scanner.nearest(
+            crate::table::VECTOR_COLUMN_NAME,
+            &self.query_vector,
+            self.limit,
+        )?;
        scanner.nprobs(self.nprobes);
        scanner.use_index(self.use_index);
        scanner.prefilter(self.prefilter);
@@ -93,7 +91,7 @@ impl Query {
    ///
    /// * `limit` - The maximum number of results to return.
    pub fn limit(mut self, limit: usize) -> Query {
-        self.limit = Some(limit);
+        self.limit = limit;
        self
    }

@@ -103,7 +101,7 @@ impl Query {
    ///
    /// * `vector` - The vector that will be used for search.
    pub fn query_vector(mut self, query_vector: Float32Array) -> Query {
-        self.query_vector = Some(query_vector);
+        self.query_vector = query_vector;
        self
    }

@@ -176,7 +174,7 @@ mod tests {
    use std::sync::Arc;

    use super::*;
-    use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, RecordBatchReader, cast::AsArray, Int32Array};
+    use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, RecordBatchReader};
    use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
    use futures::StreamExt;
    use lance::dataset::Dataset;
@@ -189,7 +187,7 @@ mod tests {
        let batches = make_test_batches();
        let ds = Dataset::write(batches, "memory://foo", None).await.unwrap();

-        let vector = Some(Float32Array::from_iter_values([0.1, 0.2]));
+        let vector = Float32Array::from_iter_values([0.1, 0.2]);
        let query = Query::new(Arc::new(ds), vector.clone());
        assert_eq!(query.query_vector, vector);

@@ -203,8 +201,8 @@ mod tests {
            .metric_type(Some(MetricType::Cosine))
            .refine_factor(Some(999));

-        assert_eq!(query.query_vector.unwrap(), new_vector);
-        assert_eq!(query.limit.unwrap(), 100);
+        assert_eq!(query.query_vector, new_vector);
+        assert_eq!(query.limit, 100);
        assert_eq!(query.nprobes, 1000);
        assert_eq!(query.use_index, true);
        assert_eq!(query.metric_type, Some(MetricType::Cosine));
@@ -216,7 +214,7 @@ mod tests {
        let batches = make_non_empty_batches();
        let ds = Arc::new(Dataset::write(batches, "memory://foo", None).await.unwrap());

-        let vector = Some(Float32Array::from_iter_values([0.1; 4]));
+        let vector = Float32Array::from_iter_values([0.1; 4]);

        let query = Query::new(ds.clone(), vector.clone());
        let result = query
@@ -246,27 +244,6 @@ mod tests {
        }
    }

-    #[tokio::test]
-    async fn test_execute_no_vector() {
-        // test that it's ok to not specify a query vector (just filter / limit)
-        let batches = make_non_empty_batches();
-        let ds = Arc::new(Dataset::write(batches, "memory://foo", None).await.unwrap());
-
-        let query = Query::new(ds.clone(), None);
-        let result = query
-            .filter(Some("id % 2 == 0".to_string()))
-            .execute()
-            .await;
-        let mut stream = result.expect("should have result");
-        // should only have one batch
-        while let Some(batch) = stream.next().await {
-            let b = batch.expect("should be Ok");            
-            // cast arr into Int32Array
-            let arr: &Int32Array = b["id"].as_primitive();
-            assert!(arr.iter().all(|x| x.unwrap() % 2 == 0));
-        }
-    }
-
    fn make_non_empty_batches() -> impl RecordBatchReader + Send + 'static {
        let vec = Box::new(RandomVector::new().named("vector".to_string()));
        let id = Box::new(IncrementingInt32::new().named("id".to_string()));
--- a/rust/vectordb/src/table.rs
+++ b/rust/vectordb/src/table.rs
@@ -23,7 +23,7 @@ use lance::dataset::cleanup::RemovalStats;
 use lance::dataset::optimize::{
    compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
 };
-use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
+use lance::dataset::{Dataset, WriteParams};
 use lance::index::DatasetIndexExt;
 use lance::io::object_store::WrappingObjectStore;
 use std::path::Path;
@@ -308,14 +308,10 @@ impl Table {
    /// # Returns
    ///
    /// * A [Query] object.
-    pub fn search(&self, query_vector: Option<Float32Array>) -> Query {
+    pub fn search(&self, query_vector: Float32Array) -> Query {
        Query::new(self.dataset.clone(), query_vector)
    }

-    pub fn filter(&self, expr: String) -> Query {
-        Query::new(self.dataset.clone(), None).filter(Some(expr))
-    }
-
    /// Returns the number of rows in this Table
    pub async fn count_rows(&self) -> Result<usize> {
        Ok(self.dataset.count_rows().await?)
@@ -342,27 +338,6 @@ impl Table {
        Ok(())
    }

-    pub async fn update(
-        &mut self,
-        predicate: Option<&str>,
-        updates: Vec<(&str, &str)>,
-    ) -> Result<()> {
-        let mut builder = UpdateBuilder::new(self.dataset.clone());
-        if let Some(predicate) = predicate {
-            builder = builder.update_where(predicate)?;
-        }
-
-        for (column, value) in updates {
-            builder = builder.set(column, value)?;
-        }
-
-        let operation = builder.build()?;
-        let new_ds = operation.execute().await?;
-        self.dataset = new_ds;
-
-        Ok(())
-    }
-
    /// Remove old versions of the dataset from disk.
    ///
    /// # Arguments
@@ -438,14 +413,11 @@ mod tests {
    use std::sync::Arc;

    use arrow_array::{
-        Array, BooleanArray, Date32Array, FixedSizeListArray, Float32Array, Float64Array,
-        Int32Array, Int64Array, LargeStringArray, RecordBatch, RecordBatchIterator,
-        RecordBatchReader, StringArray, TimestampMillisecondArray, TimestampNanosecondArray,
-        UInt32Array,
+        Array, FixedSizeListArray, Float32Array, Int32Array, RecordBatch, RecordBatchIterator,
+        RecordBatchReader,
    };
    use arrow_data::ArrayDataBuilder;
-    use arrow_schema::{DataType, Field, Schema, TimeUnit};
-    use futures::TryStreamExt;
+    use arrow_schema::{DataType, Field, Schema};
    use lance::dataset::{Dataset, WriteMode};
    use lance::index::vector::pq::PQBuildParams;
    use lance::io::object_store::{ObjectStoreParams, WrappingObjectStore};
@@ -568,272 +540,6 @@ mod tests {
        assert_eq!(table.name, "test");
    }

-    #[tokio::test]
-    async fn test_update_with_predicate() {
-        let tmp_dir = tempdir().unwrap();
-        let dataset_path = tmp_dir.path().join("test.lance");
-        let uri = dataset_path.to_str().unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, false),
-        ]));
-
-        let record_batch_iter = RecordBatchIterator::new(
-            vec![RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..10)),
-                    Arc::new(StringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                ],
-            )
-            .unwrap()]
-            .into_iter()
-            .map(Ok),
-            schema.clone(),
-        );
-
-        Dataset::write(record_batch_iter, uri, None).await.unwrap();
-        let mut table = Table::open(uri).await.unwrap();
-
-        table
-            .update(Some("id > 5"), vec![("name", "'foo'")])
-            .await
-            .unwrap();
-
-        let ds_after = Dataset::open(uri).await.unwrap();
-        let mut batches = ds_after
-            .scan()
-            .project(&["id", "name"])
-            .unwrap()
-            .try_into_stream()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-
-        while let Some(batch) = batches.pop() {
-            let ids = batch
-                .column(0)
-                .as_any()
-                .downcast_ref::<Int32Array>()
-                .unwrap()
-                .iter()
-                .collect::<Vec<_>>();
-            let names = batch
-                .column(1)
-                .as_any()
-                .downcast_ref::<StringArray>()
-                .unwrap()
-                .iter()
-                .collect::<Vec<_>>();
-            for (i, name) in names.iter().enumerate() {
-                let id = ids[i].unwrap();
-                let name = name.unwrap();
-                if id > 5 {
-                    assert_eq!(name, "foo");
-                } else {
-                    assert_eq!(name, &format!("{}", (b'a' + id as u8) as char));
-                }
-            }
-        }
-    }
-
-    #[tokio::test]
-    async fn test_update_all_types() {
-        let tmp_dir = tempdir().unwrap();
-        let dataset_path = tmp_dir.path().join("test.lance");
-        let uri = dataset_path.to_str().unwrap();
-
-        let schema = Arc::new(Schema::new(vec![
-            Field::new("int32", DataType::Int32, false),
-            Field::new("int64", DataType::Int64, false),
-            Field::new("uint32", DataType::UInt32, false),
-            Field::new("string", DataType::Utf8, false),
-            Field::new("large_string", DataType::LargeUtf8, false),
-            Field::new("float32", DataType::Float32, false),
-            Field::new("float64", DataType::Float64, false),
-            Field::new("bool", DataType::Boolean, false),
-            Field::new("date32", DataType::Date32, false),
-            Field::new(
-                "timestamp_ns",
-                DataType::Timestamp(TimeUnit::Nanosecond, None),
-                false,
-            ),
-            Field::new(
-                "timestamp_ms",
-                DataType::Timestamp(TimeUnit::Millisecond, None),
-                false,
-            ),
-            Field::new(
-                "vec_f32",
-                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 2),
-                false,
-            ),
-            Field::new(
-                "vec_f64",
-                DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float64, true)), 2),
-                false,
-            ),
-        ]));
-
-        let record_batch_iter = RecordBatchIterator::new(
-            vec![RecordBatch::try_new(
-                schema.clone(),
-                vec![
-                    Arc::new(Int32Array::from_iter_values(0..10)),
-                    Arc::new(Int64Array::from_iter_values(0..10)),
-                    Arc::new(UInt32Array::from_iter_values(0..10)),
-                    Arc::new(StringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                    Arc::new(LargeStringArray::from_iter_values(vec![
-                        "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
-                    ])),
-                    Arc::new(Float32Array::from_iter_values(
-                        (0..10).into_iter().map(|i| i as f32),
-                    )),
-                    Arc::new(Float64Array::from_iter_values(
-                        (0..10).into_iter().map(|i| i as f64),
-                    )),
-                    Arc::new(Into::<BooleanArray>::into(vec![
-                        true, false, true, false, true, false, true, false, true, false,
-                    ])),
-                    Arc::new(Date32Array::from_iter_values(0..10)),
-                    Arc::new(TimestampNanosecondArray::from_iter_values(0..10)),
-                    Arc::new(TimestampMillisecondArray::from_iter_values(0..10)),
-                    Arc::new(
-                        create_fixed_size_list(
-                            Float32Array::from_iter_values((0..20).into_iter().map(|i| i as f32)),
-                            2,
-                        )
-                        .unwrap(),
-                    ),
-                    Arc::new(
-                        create_fixed_size_list(
-                            Float64Array::from_iter_values((0..20).into_iter().map(|i| i as f64)),
-                            2,
-                        )
-                        .unwrap(),
-                    ),
-                ],
-            )
-            .unwrap()]
-            .into_iter()
-            .map(Ok),
-            schema.clone(),
-        );
-
-        Dataset::write(record_batch_iter, uri, None).await.unwrap();
-        let mut table = Table::open(uri).await.unwrap();
-
-        // check it can do update for each type
-        let updates: Vec<(&str, &str)> = vec![
-            ("string", "'foo'"),
-            ("large_string", "'large_foo'"),
-            ("int32", "1"),
-            ("int64", "1"),
-            ("uint32", "1"),
-            ("float32", "1.0"),
-            ("float64", "1.0"),
-            ("bool", "true"),
-            ("date32", "1"),
-            ("timestamp_ns", "1"),
-            ("timestamp_ms", "1"),
-            ("vec_f32", "[1.0, 1.0]"),
-            ("vec_f64", "[1.0, 1.0]"),
-        ];
-
-        // for (column, value) in test_cases {
-        table.update(None, updates).await.unwrap();
-
-        let ds_after = Dataset::open(uri).await.unwrap();
-        let mut batches = ds_after
-            .scan()
-            .project(&[
-                "string",
-                "large_string",
-                "int32",
-                "int64",
-                "uint32",
-                "float32",
-                "float64",
-                "bool",
-                "date32",
-                "timestamp_ns",
-                "timestamp_ms",
-                "vec_f32",
-                "vec_f64",
-            ])
-            .unwrap()
-            .try_into_stream()
-            .await
-            .unwrap()
-            .try_collect::<Vec<_>>()
-            .await
-            .unwrap();
-        let batch = batches.pop().unwrap();
-
-        macro_rules! assert_column {
-            ($column:expr, $array_type:ty, $expected:expr) => {
-                let array = $column
-                    .as_any()
-                    .downcast_ref::<$array_type>()
-                    .unwrap()
-                    .iter()
-                    .collect::<Vec<_>>();
-                for v in array {
-                    assert_eq!(v, Some($expected));
-                }
-            };
-        }
-
-        assert_column!(batch.column(0), StringArray, "foo");
-        assert_column!(batch.column(1), LargeStringArray, "large_foo");
-        assert_column!(batch.column(2), Int32Array, 1);
-        assert_column!(batch.column(3), Int64Array, 1);
-        assert_column!(batch.column(4), UInt32Array, 1);
-        assert_column!(batch.column(5), Float32Array, 1.0);
-        assert_column!(batch.column(6), Float64Array, 1.0);
-        assert_column!(batch.column(7), BooleanArray, true);
-        assert_column!(batch.column(8), Date32Array, 1);
-        assert_column!(batch.column(9), TimestampNanosecondArray, 1);
-        assert_column!(batch.column(10), TimestampMillisecondArray, 1);
-
-        let array = batch
-            .column(11)
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .iter()
-            .collect::<Vec<_>>();
-        for v in array {
-            let v = v.unwrap();
-            let f32array = v.as_any().downcast_ref::<Float32Array>().unwrap();
-            for v in f32array {
-                assert_eq!(v, Some(1.0));
-            }
-        }
-
-        let array = batch
-            .column(12)
-            .as_any()
-            .downcast_ref::<FixedSizeListArray>()
-            .unwrap()
-            .iter()
-            .collect::<Vec<_>>();
-        for v in array {
-            let v = v.unwrap();
-            let f64array = v.as_any().downcast_ref::<Float64Array>().unwrap();
-            for v in f64array {
-                assert_eq!(v, Some(1.0));
-            }
-        }
-    }
-
    #[tokio::test]
    async fn test_search() {
        let tmp_dir = tempdir().unwrap();
@@ -848,8 +554,8 @@ mod tests {
        let table = Table::open(uri).await.unwrap();

        let vector = Float32Array::from_iter_values([0.1, 0.2]);
-        let query = table.search(Some(vector.clone()));
-        assert_eq!(vector, query.query_vector.unwrap());
+        let query = table.search(vector.clone());
+        assert_eq!(vector, query.query_vector);
    }

    #[derive(Default, Debug)]
Author	SHA1	Message	Date
qzhu	8e25e0c7f0	reformatted	2023-12-07 12:08:05 -08:00
qzhu	5f989e86d2	SaaS python SDK doc	2023-12-07 12:01:03 -08:00