linter and clippy

add tests for rust
it passes version for all read calls
2025-12-23 05:19:58 +00:00 · 2024-11-21 07:11:35 -05:00 · 2024-11-21 06:58:51 -05:00 · 2024-11-20 11:46:04 -05:00 · 2024-11-20 10:14:39 -05:00 · 2024-11-19 17:24:28 -05:00
38 changed files with 678 additions and 135 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.13.0-beta.2"
+current_version = "0.13.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -460,7 +460,7 @@ jobs:

  release:
    name: vectordb NPM Publish
-    needs: [node, node-macos, node-linux, node-windows, node-windows-arm64]
+    needs: [node, node-macos, node-linux, node-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -500,7 +500,7 @@ jobs:

  release-nodejs:
    name: lancedb NPM Publish
-    needs: [nodejs-macos, nodejs-linux, nodejs-windows, nodejs-windows-arm64]
+    needs: [nodejs-macos, nodejs-linux, nodejs-windows]
    runs-on: ubuntu-latest
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,18 +18,18 @@ repository = "https://github.com/lancedb/lancedb"
 description = "Serverless, low-latency vector database for AI applications"
 keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]
-rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again.
+rust-version = "1.80.0"                                                     # TODO: lower this once we upgrade Lance again.

 [workspace.dependencies]
-lance = { "version" = "=0.19.2", "features" = [
+lance = { "version" = "=0.19.3", "features" = [
    "dynamodb",
-]}
-lance-index = "=0.19.2"
-lance-linalg = "=0.19.2"
-lance-table = "=0.19.2"
-lance-testing = "=0.19.2"
-lance-datafusion = "=0.19.2"
-lance-encoding = "=0.19.2"
+], git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-index = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-linalg = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-table = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-testing = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-datafusion = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
+lance-encoding = { version = "=0.19.3", git = "https://github.com/lancedb/lance.git", tag = "v0.19.3-beta.1" }
 # Note that this one does not include pyarrow
 arrow = { version = "52.2", optional = false }
 arrow-array = "52.2"
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.4.6",
+      "version": "0.12.0",
      "cpu": [
        "x64",
        "arm64"
@@ -31,9 +31,7 @@
        "win32"
      ],
      "dependencies": {
-        "@apache-arrow/ts": "^14.0.2",
        "@neon-rs/load": "^0.0.74",
-        "apache-arrow": "^14.0.2",
        "axios": "^1.4.0"
      },
      "devDependencies": {
@@ -46,6 +44,7 @@
        "@types/temp": "^0.9.1",
        "@types/uuid": "^9.0.3",
        "@typescript-eslint/eslint-plugin": "^5.59.1",
+        "apache-arrow-old": "npm:apache-arrow@13.0.0",
        "cargo-cp-artifact": "^0.1",
        "chai": "^4.3.7",
        "chai-as-promised": "^7.1.1",
@@ -62,15 +61,19 @@
        "ts-node-dev": "^2.0.0",
        "typedoc": "^0.24.7",
        "typedoc-plugin-markdown": "^3.15.3",
-        "typescript": "*",
+        "typescript": "^5.1.0",
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.6",
-        "@lancedb/vectordb-darwin-x64": "0.4.6",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.6",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.6",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.6"
+        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
+      },
+      "peerDependencies": {
+        "@apache-arrow/ts": "^14.0.2",
+        "apache-arrow": "^14.0.2"
      }
    },
    "../node/node_modules/apache-arrow": {
--- a/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
+++ b/docs/src/embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
@@ -20,7 +20,7 @@ Supported parameters (to be passed in `create` method) are:

 | Parameter | Type | Default Value | Description |
 |---|---|--------|---------|
-| `name` | `str` | `"voyage-3"` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
+| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
 | `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
 | `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |

--- a/docs/src/embeddings/default_embedding_functions.md
+++ b/docs/src/embeddings/default_embedding_functions.md
@@ -53,6 +53,7 @@ These functions are registered by default to handle text embeddings.
 | [**Jina Embeddings**](available_embedding_models/text_embedding_functions/jina_embedding.md "jina") | 🔗 World-class embedding models to improve your search and RAG systems. You will need **jina api key**. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/jina.png" alt="Jina Icon" width="90" height="35">](available_embedding_models/text_embedding_functions/jina_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [ **AWS Bedrock Functions**](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md "bedrock-text") | ☁️ AWS Bedrock supports multiple base models for generating text embeddings. You need to setup the AWS credentials to use this embedding function. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/aws_bedrock.png" alt="AWS Bedrock Icon" width="120" height="35">](available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
 | [**IBM Watsonx.ai**](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md "watsonx") | 💡 Generate text embeddings using IBM's watsonx.ai platform. **Note**: watsonx.ai library is an optional dependency. | [<img src="https://raw.githubusercontent.com/lancedb/assets/main/docs/assets/logos/watsonx.png" alt="Watsonx Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md) |
+| [**VoyageAI Embeddings**](available_embedding_models/text_embedding_functions/voyageai_embedding.md "voyageai") | 🌕 Voyage AI provides cutting-edge embedding and rerankers. This will help you get started with **VoyageAI** embedding models using LanceDB. Using voyageai API requires voyageai package. Install it via `pip`. | [<img src="https://www.voyageai.com/logo.svg" alt="VoyageAI Icon" width="140" height="35">](available_embedding_models/text_embedding_functions/voyageai_embedding.md) |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               



@@ -66,6 +67,7 @@ These functions are registered by default to handle text embeddings.
 [jina-key]: "jina"
 [aws-key]: "bedrock-text"
 [watsonx-key]: "watsonx"
+[voyageai-key]: "voyageai"


 ## Multi-modal Embedding Functions🖼️ 
--- a/docs/src/fts.md
+++ b/docs/src/fts.md
@@ -160,3 +160,32 @@ To search for a phrase, the index must be created with `with_position=True`:
 table.create_fts_index("text", use_tantivy=False, with_position=True)
 ```
 This will allow you to search for phrases, but it will also significantly increase the index size and indexing time.
+
+
+## Incremental indexing
+
+LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table.
+
+This can make the query more efficient, especially when the table is large and the new records are relatively small.
+
+=== "Python"
+
+    ```python
+    table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])
+    table.optimize()
+    ```
+
+=== "TypeScript"
+
+    ```typescript
+    await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]);
+    await tbl.optimize();
+    ```
+
+=== "Rust"
+
+    ```rust
+    let more_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
+    tbl.add(more_data).execute().await?;
+    tbl.optimize(OptimizeAction::All).execute().await?;
+    ```
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -274,7 +274,7 @@ table = db.create_table(table_name, schema=Content)

 Sometimes your data model may contain nested objects.
 For example, you may want to store the document string
-and the document soure name as a nested Document object:
+and the document source name as a nested Document object:

 ```python
 class Document(BaseModel):
@@ -466,7 +466,7 @@ You can create an empty table for scenarios where you want to add data to the ta

 ## Adding to a table

-After a table has been created, you can always add more data to it usind the `add` method
+After a table has been created, you can always add more data to it using the `add` method

 === "Python"
    You can add any of the valid data structures accepted by LanceDB table, i.e, `dict`, `list[dict]`, `pd.DataFrame`, or `Iterator[pa.RecordBatch]`. Below are some examples.
@@ -535,7 +535,7 @@ After a table has been created, you can always add more data to it usind the `ad
    ```

    ??? "Ingesting Pydantic models with LanceDB embedding API"
-        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` feild as None to allow LanceDB to automatically vectorize the data.
+        When using LanceDB's embedding API, you can add Pydantic models directly to the table. LanceDB will automatically convert the `vector` field to a vector before adding it to the table. You need to specify the default value of `vector` field as None to allow LanceDB to automatically vectorize the data.

        ```python
        import lancedb
@@ -880,4 +880,4 @@ There are three possible settings for `read_consistency_interval`:

 Learn the best practices on creating an ANN index and getting the most out of it.

-[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](migration.md) for more information.
+[^1]: The `vectordb` package is a legacy package that is  deprecated in favor of `@lancedb/lancedb`.  The `vectordb` package will continue to receive bug fixes and security updates until September 2024.  We recommend all new projects use `@lancedb/lancedb`.  See the [migration guide](../migration.md) for more information.
--- a/docs/src/reranking/index.md
+++ b/docs/src/reranking/index.md
@@ -9,6 +9,7 @@ LanceDB comes with some built-in rerankers. Some of the rerankers that are avail
 | `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
 | `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
 | `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
+| `VoyageAIReranker` | Uses voyageai Reranker API to rerank results | Vector, FTS, Hybrid |


 ## Using a Reranker
@@ -73,6 +74,7 @@ LanceDB comes with some built-in rerankers. Here are some of the rerankers that
 - [Jina Reranker](./jina.md)
 - [AnswerDotAI Rerankers](./answerdotai.md)
 - [Reciprocal Rank Fusion Reranker](./rrf.md)
+- [VoyageAI Reranker](./voyageai.md)

 ## Creating Custom Rerankers

--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.13.0-beta.2</version>
+        <version>0.13.0-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.13.0-beta.2</version>
+    <version>0.13.0-final.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.2",
+  "version": "0.13.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.13.0-beta.2",
+      "version": "0.13.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,12 +52,12 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.2",
-        "@lancedb/vectordb-darwin-x64": "0.13.0-beta.2",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.2",
-        "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.2",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.2",
-        "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.2"
+        "@lancedb/vectordb-darwin-arm64": "0.13.0",
+        "@lancedb/vectordb-darwin-x64": "0.13.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.13.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.13.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,6 +327,66 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
+    "node_modules/@lancedb/vectordb-darwin-arm64": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.13.0.tgz",
+      "integrity": "sha512-8hdcjkRmgrdQYf1jN+DyZae40LIv8UUfnWy70Uid5qy63sSvRW/+MvIdqIPFr9QlLUXmpyyQuX0y3bZhUR99cQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-darwin-x64": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.13.0.tgz",
+      "integrity": "sha512-fWzAY4l5SQtNfMYh80v+M66ugZHhdxbkpk5mNEv6Zsug3DL6kRj3Uv31/i0wgzY6F5G3LUlbjZerN+eTnDLwOw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.13.0.tgz",
+      "integrity": "sha512-ltwAT9baOSuR5YiGykQXPC8/HGYF13vpI47qxhP9yfgiz9pA8EUn8p8YrBRzq7J4DIZ4b8JSVDXQnMIqEtB4Kg==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.13.0.tgz",
+      "integrity": "sha512-MiT/RBlMPGGRh7BX+MXwRuNiiUnKmuDcHH8nm88IH28T7TQxXIbA9w6UpSg5m9f3DgKQI2K8oLi29oKIB8ZwDQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
+      "version": "0.13.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.13.0.tgz",
+      "integrity": "sha512-SovP/hwWYLJIy65DKbVuXlBPTb/nwvVpTO6dh9zRch+L5ek6JmVAkwsfeTS2p5bMa8VPujsCXYUAVuCDEJU8wg==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
    "node_modules/@neon-rs/cli": {
      "version": "0.0.160",
      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
@@ -1441,9 +1501,9 @@
      "dev": true
    },
    "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
      "dev": true,
      "dependencies": {
        "path-key": "^3.1.0",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.13.0-beta.2",
+  "version": "0.13.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -89,11 +89,11 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.13.0-beta.2",
-    "@lancedb/vectordb-darwin-x64": "0.13.0-beta.2",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0-beta.2",
-    "@lancedb/vectordb-linux-x64-gnu": "0.13.0-beta.2",
-    "@lancedb/vectordb-win32-x64-msvc": "0.13.0-beta.2",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0-beta.2"
+    "@lancedb/vectordb-darwin-arm64": "0.13.0",
+    "@lancedb/vectordb-darwin-x64": "0.13.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.13.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.13.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.13.0",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.13.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.13.0-beta.2"
+version = "0.13.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -477,6 +477,54 @@ describe("When creating an index", () => {
    expect(rst.numRows).toBe(1);
  });

+  it("should create and search IVF_HNSW indices", async () => {
+    await tbl.createIndex("vec", {
+      config: Index.hnswSq(),
+    });
+
+    // check index directory
+    const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
+    expect(fs.readdirSync(indexDir)).toHaveLength(1);
+    const indices = await tbl.listIndices();
+    expect(indices.length).toBe(1);
+    expect(indices[0]).toEqual({
+      name: "vec_idx",
+      indexType: "IvfHnswSq",
+      columns: ["vec"],
+    });
+
+    // Search without specifying the column
+    let rst = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .distanceType("dot")
+      .toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search using `vectorSearch`
+    rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
+    expect(rst.numRows).toBe(2);
+
+    // Search with specifying the column
+    const rst2 = await tbl
+      .query()
+      .limit(2)
+      .nearestTo(queryVec)
+      .column("vec")
+      .toArrow();
+    expect(rst2.numRows).toBe(2);
+    expect(rst.toString()).toEqual(rst2.toString());
+
+    // test offset
+    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
+    expect(rst.numRows).toBe(1);
+
+    // test ef
+    rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
+    expect(rst.numRows).toBe(2);
+  });
+
  it("should be able to query unindexed data", async () => {
    await tbl.createIndex("vec");
    await tbl.add([
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -385,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

+  /**
+   * Set the number of candidates to consider during the search
+   *
+   * This argument is only used when the vector column has an HNSW index.
+   * If there is no index then this value is ignored.
+   *
+   * Increasing this value will increase the recall of your query but will
+   * also increase the latency of your query. The default value is 1.5*limit.
+   */
+  ef(ef: number): VectorQuery {
+    super.doCall((inner) => inner.ef(ef));
+    return this;
+  }
+
  /**
   * Set the vector column to query
   *
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.13.0-beta.2",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.13.0-beta.2",
+	"version": "0.13.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.13.0-beta.2",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.13.0-beta.2",
+	"version": "0.13.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.13.0-beta.2",
+  "version": "0.13.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.13.0-beta.2",
+	"version": "0.13.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.13.0-beta.1",
+  "version": "0.13.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.13.0-beta.1",
+      "version": "0.13.0",
      "cpu": [
        "x64",
        "arm64"
@@ -6052,9 +6052,9 @@
      }
    },
    "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
      "devOptional": true,
      "dependencies": {
        "path-key": "^3.1.0",
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -10,7 +10,7 @@
    "vector database",
    "ann"
  ],
-  "version": "0.13.0-beta.2",
+  "version": "0.13.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -167,6 +167,11 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    #[napi]
+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    #[napi]
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -15,7 +15,7 @@ crate-type = ["cdylib"]

 [dependencies]
 arrow = { version = "52.1", features = ["pyarrow"] }
-lancedb = { path = "../rust/lancedb" }
+lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
 pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] }
 # Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119
@@ -33,6 +33,11 @@ pyo3-build-config = { version = "0.20.3", features = [
 ] }

 [features]
-default = ["remote"]
+default = ["default-tls", "remote"]
 fp16kernels = ["lancedb/fp16kernels"]
 remote = ["lancedb/remote"]
+
+# TLS
+default-tls = ["lancedb/default-tls"]
+native-tls = ["lancedb/native-tls"]
+rustls-tls = ["lancedb/rustls-tls"]
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ name = "lancedb"
 dependencies = [
    "deprecation",
    "nest-asyncio~=1.0",
-    "pylance==0.19.2",
+    "pylance==0.19.3b1",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "packaging",
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -131,6 +131,8 @@ class Query(pydantic.BaseModel):

    fast_search: bool = False

+    ef: Optional[int] = None
+

 class LanceQueryBuilder(ABC):
    """An abstract query builder. Subclasses are defined for vector search,
@@ -257,6 +259,7 @@ class LanceQueryBuilder(ABC):
        self._with_row_id = False
        self._vector = None
        self._text = None
+        self._ef = None

    @deprecation.deprecated(
        deprecated_in="0.3.1",
@@ -638,6 +641,28 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceVectorQueryBuilder:
+        """Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
        """Set the refine factor to use, increasing the number of vectors sampled.

@@ -700,6 +725,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            with_row_id=self._with_row_id,
            offset=self._offset,
            fast_search=self._fast_search,
+            ef=self._ef,
        )
        result_set = self._table._execute_query(query, batch_size)
        if self._reranker is not None:
@@ -1071,6 +1097,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._vector_query.nprobes(self._nprobes)
        if self._refine_factor:
            self._vector_query.refine_factor(self._refine_factor)
+        if self._ef:
+            self._vector_query.ef(self._ef)

        with ThreadPoolExecutor() as executor:
            fts_future = executor.submit(self._fts_query.with_row_id(True).to_arrow)
@@ -1197,6 +1225,29 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._nprobes = nprobes
        return self

+    def ef(self, ef: int) -> LanceHybridQueryBuilder:
+        """
+        Set the number of candidates to consider during search.
+
+        Higher values will yield better recall (more likely to find vectors if
+        they exist) at the expense of latency.
+
+        This only applies to the HNSW-related index.
+        The default value is 1.5 * limit.
+
+        Parameters
+        ----------
+        ef: int
+            The number of candidates to consider during search.
+
+        Returns
+        -------
+        LanceHybridQueryBuilder
+            The LanceHybridQueryBuilder object.
+        """
+        self._ef = ef
+        return self
+
    def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
        """Set the distance metric to use.

@@ -1495,7 +1546,8 @@ class AsyncQuery(AsyncQueryBase):
        return pa.array(vec)

    def nearest_to(
-        self, query_vector: Optional[Union[VEC, Tuple, List[VEC]]] = None
+        self,
+        query_vector: Union[VEC, Tuple, List[VEC]],
    ) -> AsyncVectorQuery:
        """
        Find the nearest vectors to the given query vector.
@@ -1542,6 +1594,9 @@ class AsyncQuery(AsyncQueryBase):
        will be added to the results.  This column will contain the index of the
        query vector that the result is nearest to.
        """
+        if query_vector is None:
+            raise ValueError("query_vector can not be None")
+
        if (
            isinstance(query_vector, list)
            and len(query_vector) > 0
@@ -1618,7 +1673,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        """
        Set the number of partitions to search (probe)

-        This argument is only used when the vector column has an IVF PQ index.
+        This argument is only used when the vector column has an IVF-based index.
        If there is no index then this value is ignored.

        The IVF stage of IVF PQ divides the input into partitions (clusters) of
@@ -1640,6 +1695,21 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.nprobes(nprobes)
        return self

+    def ef(self, ef: int) -> AsyncVectorQuery:
+        """
+        Set the number of candidates to consider during search
+
+        This argument is only used when the vector column has an HNSW index.
+        If there is no index then this value is ignored.
+
+        Increasing this value will increase the recall of your query but will also
+        increase the latency of your query.  The default value is 1.5 * limit.  This
+        default is good for many cases but the best value to use will depend on your
+        data and the recall that you need to achieve.
+        """
+        self._inner.ef(ef)
+        return self
+
    def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
        """
        A multiplier to control how many additional rows are taken during the refine
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -86,6 +86,12 @@ class RemoteTable(Table):
        """to_pandas() is not yet supported on LanceDB cloud."""
        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

+    def checkout(self, version):
+        return self._loop.run_until_complete(self._table.checkout(version))
+
+    def checkout_latest(self):
+        return self._loop.run_until_complete(self._table.checkout_latest())
+
    def list_indices(self):
        """List all the indices on the table"""
        return self._loop.run_until_complete(self._table.list_indices())
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1012,6 +1012,18 @@ class Table(ABC):
            The names of the columns to drop.
        """

+    @abstractmethod
+    def checkout(self):
+        """
+        TODO comments
+        """
+
+    @abstractmethod
+    def checkout_latest(self):
+        """
+        TODO comments
+        """
+
    @cached_property
    def _dataset_uri(self) -> str:
        return _table_uri(self._conn.uri, self.name)
@@ -1959,6 +1971,7 @@ class LanceTable(Table):
                "metric": query.metric,
                "nprobes": query.nprobes,
                "refine_factor": query.refine_factor,
+                "ef": query.ef,
            }
        return ds.scanner(
            columns=query.columns,
@@ -2697,7 +2710,7 @@ class AsyncTable:

    def vector_search(
        self,
-        query_vector: Optional[Union[VEC, Tuple]] = None,
+        query_vector: Union[VEC, Tuple],
    ) -> AsyncVectorQuery:
        """
        Search the table with a given query vector.
@@ -2736,6 +2749,8 @@ class AsyncTable:
                async_query = async_query.refine_factor(query.refine_factor)
            if query.vector_column:
                async_query = async_query.column(query.vector_column)
+            if query.ef:
+                async_query = async_query.ef(query.ef)

        if not query.prefilter:
            async_query = async_query.postfilter()
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -1,21 +1,9 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import unittest.mock as mock
 from datetime import timedelta
-from typing import Optional

-import lance
 import lancedb
 from lancedb.index import IvfPq
 import numpy as np
@@ -23,41 +11,15 @@ import pandas.testing as tm
 import pyarrow as pa
 import pytest
 import pytest_asyncio
-from lancedb.db import LanceDBConnection
 from lancedb.pydantic import LanceModel, Vector
 from lancedb.query import AsyncQueryBase, LanceVectorQueryBuilder, Query
 from lancedb.table import AsyncTable, LanceTable


-class MockTable:
-    def __init__(self, tmp_path):
-        self.uri = tmp_path
-        self._conn = LanceDBConnection(self.uri)
-
-    def to_lance(self):
-        return lance.dataset(self.uri)
-
-    def _execute_query(self, query, batch_size: Optional[int] = None):
-        ds = self.to_lance()
-        return ds.scanner(
-            columns=query.columns,
-            filter=query.filter,
-            prefilter=query.prefilter,
-            nearest={
-                "column": query.vector_column,
-                "q": query.vector,
-                "k": query.k,
-                "metric": query.metric,
-                "nprobes": query.nprobes,
-                "refine_factor": query.refine_factor,
-            },
-            batch_size=batch_size,
-            offset=query.offset,
-        ).to_reader()
-
-
-@pytest.fixture
-def table(tmp_path) -> MockTable:
+@pytest.fixture(scope="module")
+def table(tmpdir_factory) -> lancedb.table.Table:
+    tmp_path = str(tmpdir_factory.mktemp("data"))
+    db = lancedb.connect(tmp_path)
    df = pa.table(
        {
            "vector": pa.array(
@@ -68,8 +30,7 @@ def table(tmp_path) -> MockTable:
            "float_field": pa.array([1.0, 2.0]),
        }
    )
-    lance.write_dataset(df, tmp_path)
-    return MockTable(tmp_path)
+    return db.create_table("test", df)


@pytest_asyncio.fixture
@@ -126,6 +87,12 @@ def test_query_builder(table):
    assert all(np.array(rs[0]["vector"]) == [1, 2])


+def test_with_row_id(table: lancedb.table.Table):
+    rs = table.search().with_row_id(True).to_arrow()
+    assert "_rowid" in rs.column_names
+    assert rs["_rowid"].to_pylist() == [0, 1]
+
+
 def test_vector_query_with_no_limit(table):
    with pytest.raises(ValueError):
        LanceVectorQueryBuilder(table, [0, 0], "vector").limit(0).select(
@@ -365,6 +332,12 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
    assert df.shape == (0, 4)


+@pytest.mark.asyncio
+async def test_none_query(table_async: AsyncTable):
+    with pytest.raises(ValueError):
+        await table_async.query().nearest_to(None).to_arrow()
+
+
@pytest.mark.asyncio
 async def test_fast_search_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -185,6 +185,7 @@ def test_query_sync_minimal():
            "k": 10,
            "prefilter": False,
            "refine_factor": None,
+            "ef": None,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 20,
        }
@@ -223,6 +224,7 @@ def test_query_sync_maximal():
            "refine_factor": 10,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 5,
+            "ef": None,
            "filter": "id > 0",
            "columns": ["id", "name"],
            "vector_column": "vector2",
@@ -318,6 +320,7 @@ def test_query_sync_hybrid():
                "refine_factor": None,
                "vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                "nprobes": 20,
+                "ef": None,
                "with_row_id": True,
            }
            return pa.table({"_rowid": [1, 2, 3], "_distance": [0.1, 0.2, 0.3]})
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -195,6 +195,10 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    pub fn ef(&mut self, ef: u32) {
+        self.inner = self.inner.clone().ef(ef as usize);
+    }
+
    pub fn bypass_vector_index(&mut self) {
        self.inner = self.inner.clone().bypass_vector_index()
    }
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.13.0-beta.2"
+version = "0.13.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.13.0-beta.2"
+version = "0.13.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -48,9 +48,16 @@ async-openai = { version = "0.20.0", optional = true }
 serde_with = { version = "3.8.1" }
 aws-sdk-bedrockruntime = { version = "1.27.0", optional = true }
 # For remote feature
-reqwest = { version = "0.12.0", features = ["gzip", "json", "stream"], optional = true }
-rand = { version = "0.8.3", features = ["small_rng"], optional = true}
-http = { version = "1",  optional = true } # Matching what is in reqwest
+reqwest = { version = "0.12.0", default-features = false, features = [
+    "charset",
+    "gzip",
+    "http2",
+    "json",
+    "macos-system-configuration",
+    "stream",
+], optional = true }
+rand = { version = "0.8.3", features = ["small_rng"], optional = true }
+http = { version = "1", optional = true } # Matching what is in reqwest
 uuid = { version = "1.7.0", features = ["v4"], optional = true }
 polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
 polars = { version = ">=0.37,<0.40.0", optional = true }
@@ -75,7 +82,7 @@ http-body = "1" # Matching reqwest


 [features]
-default = []
+default = ["default-tls"]
 remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
 fp16kernels = ["lance-linalg/fp16kernels"]
 s3-test = []
@@ -90,6 +97,11 @@ sentence-transformers = [
    "dep:tokenizers"
 ]

+# TLS
+default-tls = ["reqwest?/default-tls"]
+native-tls = ["reqwest?/native-tls"]
+rustls-tls = ["reqwest?/rustls-tls"]
+
 [[example]]
 name = "openai"
 required-features = ["openai"]
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -704,6 +704,9 @@ pub struct VectorQuery {
    // IVF PQ - ANN search.
    pub(crate) query_vector: Vec<Arc<dyn Array>>,
    pub(crate) nprobes: usize,
+    // The number of candidates to return during the refine step for HNSW,
+    // defaults to 1.5 * limit.
+    pub(crate) ef: Option<usize>,
    pub(crate) refine_factor: Option<u32>,
    pub(crate) distance_type: Option<DistanceType>,
    /// Default is true. Set to false to enforce a brute force search.
@@ -717,6 +720,7 @@ impl VectorQuery {
            column: None,
            query_vector: Vec::new(),
            nprobes: 20,
+            ef: None,
            refine_factor: None,
            distance_type: None,
            use_index: true,
@@ -776,6 +780,18 @@ impl VectorQuery {
        self
    }

+    /// Set the number of candidates to return during the refine step for HNSW
+    ///
+    /// This argument is only used when the vector column has an HNSW index.
+    /// If there is no index then this value is ignored.
+    ///
+    /// Increasing this value will increase the recall of your query but will
+    /// also increase the latency of your query.  The default value is 1.5*limit.
+    pub fn ef(mut self, ef: usize) -> Self {
+        self.ef = Some(ef);
+        self
+    }
+
    /// A multiplier to control how many additional rows are taken during the refine step
    ///
    /// This argument is only used when the vector column has an IVF PQ index.
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -22,6 +22,7 @@ use lance::dataset::scanner::DatasetRecordBatchStream;
 use lance::dataset::{ColumnAlteration, NewColumnTransform};
 use lance_datafusion::exec::OneShotExec;
 use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock;

 use crate::{
    connection::NoData,
@@ -43,17 +44,32 @@ pub struct RemoteTable<S: HttpSend = Sender> {
    #[allow(dead_code)]
    client: RestfulLanceDbClient<S>,
    name: String,
+
+    version: RwLock<Option<u64>>,
 }

 impl<S: HttpSend> RemoteTable<S> {
    pub fn new(client: RestfulLanceDbClient<S>, name: String) -> Self {
-        Self { client, name }
+        Self {
+            client,
+            name,
+            version: RwLock::new(None),
+        }
    }

    async fn describe(&self) -> Result<TableDescription> {
-        let request = self
+        let version = self.current_version().await;
+        self.describe_version(version).await
+    }
+
+    async fn describe_version(&self, version: Option<u64>) -> Result<TableDescription> {
+        let mut request = self
            .client
            .post(&format!("/v1/table/{}/describe/", self.name));
+
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;

        let response = self.check_table_response(&request_id, response).await?;
@@ -196,6 +212,7 @@ impl<S: HttpSend> RemoteTable<S> {
        body["prefilter"] = query.base.prefilter.into();
        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
        body["nprobes"] = query.nprobes.into();
+        body["ef"] = query.ef.into();
        body["refine_factor"] = query.refine_factor.into();
        if let Some(vector_column) = query.column.as_ref() {
            body["vector_column"] = serde_json::Value::String(vector_column.clone());
@@ -250,6 +267,24 @@ impl<S: HttpSend> RemoteTable<S> {
            }
        }
    }
+
+    async fn check_mutable(&self) -> Result<()> {
+        let read_guard = self.version.read().await;
+        match *read_guard {
+            None => Ok(()),
+            Some(version) => Err(Error::NotSupported {
+                message: format!(
+                    "Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
+                    version
+                )
+            })
+        }
+    }
+
+    async fn current_version(&self) -> Option<u64> {
+        let read_guard = self.version.read().await;
+        *read_guard
+    }
 }

 #[derive(Deserialize)]
@@ -277,7 +312,11 @@ mod test_utils {
            T: Into<reqwest::Body>,
        {
            let client = client_with_handler(handler);
-            Self { client, name }
+            Self {
+                client,
+                name,
+                version: RwLock::new(None),
+            }
        }
    }
 }
@@ -296,17 +335,30 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    async fn version(&self) -> Result<u64> {
        self.describe().await.map(|desc| desc.version)
    }
-    async fn checkout(&self, _version: u64) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "checkout is not supported on LanceDB cloud.".into(),
-        })
+    async fn checkout(&self, version: u64) -> Result<()> {
+        // check that the version exists
+        self.describe_version(Some(version))
+            .await
+            .map_err(|e| match e {
+                // try to map the error to a more user-friendly error telling them
+                // specifically that the version does not exist
+                Error::TableNotFound { name } => Error::TableNotFound {
+                    name: format!("{} (version: {})", name, version),
+                },
+                e => e,
+            })?;
+
+        let mut write_guard = self.version.write().await;
+        *write_guard = Some(version);
+        Ok(())
    }
    async fn checkout_latest(&self) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "checkout is not supported on LanceDB cloud.".into(),
-        })
+        let mut write_guard = self.version.write().await;
+        *write_guard = None;
+        Ok(())
    }
    async fn restore(&self) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "restore is not supported on LanceDB cloud.".into(),
        })
@@ -320,10 +372,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
            .client
            .post(&format!("/v1/table/{}/count_rows/", self.name));

+        let version = self.current_version().await;
+
        if let Some(filter) = filter {
-            request = request.json(&serde_json::json!({ "predicate": filter }));
+            request = request.json(&serde_json::json!({ "predicate": filter, "version": version }));
        } else {
-            request = request.json(&serde_json::json!({}));
+            let body = serde_json::json!({ "version": version });
+            request = request.json(&body);
        }

        let (request_id, response) = self.client.send(request, true).await?;
@@ -343,6 +398,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        add: AddDataBuilder<NoData>,
        data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        let body = Self::reader_as_body(data)?;
        let mut request = self
            .client
@@ -371,7 +427,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    ) -> Result<Arc<dyn ExecutionPlan>> {
        let request = self.client.post(&format!("/v1/table/{}/query/", self.name));

-        let body = serde_json::Value::Object(Default::default());
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
        let bodies = Self::apply_vector_query_params(body, query)?;

        let mut futures = Vec::with_capacity(bodies.len());
@@ -406,7 +463,8 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
            .post(&format!("/v1/table/{}/query/", self.name))
            .header(CONTENT_TYPE, JSON_CONTENT_TYPE);

-        let mut body = serde_json::Value::Object(Default::default());
+        let version = self.current_version().await;
+        let mut body = serde_json::json!({ "version": version });
        Self::apply_query_params(&mut body, query)?;
        // Empty vector can be passed if no vector search is performed.
        body["vector"] = serde_json::Value::Array(Vec::new());
@@ -420,6 +478,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(DatasetRecordBatchStream::new(stream))
    }
    async fn update(&self, update: UpdateBuilder) -> Result<u64> {
+        self.check_mutable().await?;
        let request = self
            .client
            .post(&format!("/v1/table/{}/update/", self.name));
@@ -441,6 +500,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(0) // TODO: support returning number of modified rows once supported in SaaS.
    }
    async fn delete(&self, predicate: &str) -> Result<()> {
+        self.check_mutable().await?;
        let body = serde_json::json!({ "predicate": predicate });
        let request = self
            .client
@@ -452,6 +512,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    }

    async fn create_index(&self, mut index: IndexBuilder) -> Result<()> {
+        self.check_mutable().await?;
        let request = self
            .client
            .post(&format!("/v1/table/{}/create_index/", self.name));
@@ -530,6 +591,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        params: MergeInsertBuilder,
        new_data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        let query = MergeInsertRequest::try_from(params)?;
        let body = Self::reader_as_body(new_data)?;
        let request = self
@@ -546,6 +608,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        Ok(())
    }
    async fn optimize(&self, _action: OptimizeAction) -> Result<OptimizeStats> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "optimize is not supported on LanceDB cloud.".into(),
        })
@@ -555,16 +618,19 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
        _transforms: NewColumnTransform,
        _read_columns: Option<Vec<String>>,
    ) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "add_columns is not yet supported.".into(),
        })
    }
    async fn alter_columns(&self, _alterations: &[ColumnAlteration]) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "alter_columns is not yet supported.".into(),
        })
    }
    async fn drop_columns(&self, _columns: &[&str]) -> Result<()> {
+        self.check_mutable().await?;
        Err(Error::NotSupported {
            message: "drop_columns is not yet supported.".into(),
        })
@@ -572,9 +638,13 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {

    async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
        // Make request to list the indices
-        let request = self
+        let mut request = self
            .client
            .post(&format!("/v1/table/{}/index/list/", self.name));
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;
        let response = self.check_table_response(&request_id, response).await?;

@@ -624,10 +694,14 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
    }

    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
-        let request = self.client.post(&format!(
+        let mut request = self.client.post(&format!(
            "/v1/table/{}/index/{}/stats/",
            self.name, index_name
        ));
+        let version = self.current_version().await;
+        let body = serde_json::json!({ "version": version });
+        request = request.json(&body);
+
        let (request_id, response) = self.client.send(request, true).await?;

        if response.status() == StatusCode::NOT_FOUND {
@@ -805,7 +879,10 @@ mod tests {
                request.headers().get("Content-Type").unwrap(),
                JSON_CONTENT_TYPE
            );
-            assert_eq!(request.body().unwrap().as_bytes().unwrap(), br#"{}"#);
+            assert_eq!(
+                request.body().unwrap().as_bytes().unwrap(),
+                br#"{"version":null}"#
+            );

            http::Response::builder().status(200).body("42").unwrap()
        });
@@ -822,7 +899,7 @@ mod tests {
            );
            assert_eq!(
                request.body().unwrap().as_bytes().unwrap(),
-                br#"{"predicate":"a > 10"}"#
+                br#"{"predicate":"a > 10","version":null}"#
            );

            http::Response::builder().status(200).body("42").unwrap()
@@ -1121,7 +1198,9 @@ mod tests {
                "prefilter": true,
                "distance_type": "l2",
                "nprobes": 20,
+                "ef": Option::<usize>::None,
                "refine_factor": null,
+                "version": null,
            });
            // Pass vector separately to make sure it matches f32 precision.
            expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
@@ -1166,7 +1245,9 @@ mod tests {
                "bypass_vector_index": true,
                "columns": ["a", "b"],
                "nprobes": 12,
+                "ef": Option::<usize>::None,
                "refine_factor": 2,
+                "version": null,
            });
            // Pass vector separately to make sure it matches f32 precision.
            expected_body["vector"] = vec![0.1f32, 0.2, 0.3].into();
@@ -1222,6 +1303,7 @@ mod tests {
                "k": 10,
                "vector": [],
                "with_row_id": true,
+                "version": null
            });
            assert_eq!(body, expected_body);

@@ -1451,4 +1533,195 @@ mod tests {
        let indices = table.index_stats("my_index").await.unwrap();
        assert!(indices.is_none());
    }
+
+    #[tokio::test]
+    async fn test_passes_version() {
+        let table = Table::new_with_handler("my_table", |request| {
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            let version = body
+                .as_object()
+                .unwrap()
+                .get("version")
+                .unwrap()
+                .as_u64()
+                .unwrap();
+            assert_eq!(version, 42);
+
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                "/v1/table/my_table/index/list/" => {
+                    serde_json::json!({
+                        "indexes": []
+                    })
+                }
+                "/v1/table/my_table/index/my_idx/stats/" => {
+                    serde_json::json!({
+                        "num_indexed_rows": 100000,
+                        "num_unindexed_rows": 0,
+                        "index_type": "IVF_PQ",
+                        "distance_type": "l2"
+                    })
+                }
+                "/v1/table/my_table/count_rows/" => {
+                    serde_json::json!(1000)
+                }
+                "/v1/table/my_table/query/" => {
+                    let expected_data = RecordBatch::try_new(
+                        Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+                        vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+                    )
+                    .unwrap();
+                    let expected_data_ref = expected_data.clone();
+                    let response_body = write_ipc_file(&expected_data_ref);
+                    return http::Response::builder()
+                        .status(200)
+                        .header(CONTENT_TYPE, ARROW_FILE_CONTENT_TYPE)
+                        .body(response_body)
+                        .unwrap();
+                }
+
+                path => panic!("Unexpected path: {}", path),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(
+                    serde_json::to_string(&response_body)
+                        .unwrap()
+                        .as_bytes()
+                        .to_vec(),
+                )
+                .unwrap()
+        });
+
+        table.checkout(42).await.unwrap();
+
+        // ensure that version is passed to the /describe endpoint
+        let version = table.version().await.unwrap();
+        assert_eq!(version, 42);
+
+        // ensure it's passed to other read API calls
+        table.list_indices().await.unwrap();
+        table.index_stats("my_idx").await.unwrap();
+        table.count_rows(None).await.unwrap();
+        table
+            .query()
+            .nearest_to(vec![0.1, 0.2, 0.3])
+            .unwrap()
+            .execute()
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_fails_if_checkout_version_doesnt_exist() {
+        let table = Table::new_with_handler("my_table", |request| {
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            let version = body
+                .as_object()
+                .unwrap()
+                .get("version")
+                .unwrap()
+                .as_u64()
+                .unwrap();
+            if version != 42 {
+                return http::Response::builder()
+                    .status(404)
+                    .body(format!("Table my_table (version: {}) not found", version))
+                    .unwrap();
+            }
+
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                _ => panic!("Unexpected path"),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(serde_json::to_string(&response_body).unwrap())
+                .unwrap()
+        });
+
+        let res = table.checkout(43).await;
+        println!("{:?}", res);
+        assert!(
+            matches!(res, Err(Error::TableNotFound { name }) if name == "my_table (version: 43)")
+        );
+    }
+
+    #[tokio::test]
+    async fn test_timetravel_immutable() {
+        let table = Table::new_with_handler::<String>("my_table", |request| {
+            let response_body = match request.url().path() {
+                "/v1/table/my_table/describe/" => {
+                    serde_json::json!({
+                        "version": 42,
+                        "schema": { "fields": [] }
+                    })
+                }
+                _ => panic!("Should not have made a request: {:?}", request),
+            };
+
+            http::Response::builder()
+                .status(200)
+                .body(serde_json::to_string(&response_body).unwrap())
+                .unwrap()
+        });
+
+        table.checkout(42).await.unwrap();
+
+        // Ensure that all mutable operations fail.
+        let res = table
+            .update()
+            .column("a", "a + 1")
+            .column("b", "b - 1")
+            .only_if("b > 10")
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let batch = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let data = Box::new(RecordBatchIterator::new(
+            [Ok(batch.clone())],
+            batch.schema(),
+        ));
+        let res = table.merge_insert(&["some_col"]).execute(data).await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let res = table.delete("id in (1, 2, 3)").await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let data = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
+            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+        )
+        .unwrap();
+        let res = table
+            .add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+
+        let res = table
+            .create_index(&["a"], Index::IvfPq(Default::default()))
+            .execute()
+            .await;
+        assert!(matches!(res, Err(Error::NotSupported { .. })));
+    }
 }
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1904,6 +1904,9 @@ impl TableInternal for NativeTable {
            query.base.offset.map(|offset| offset as i64),
        )?;
        scanner.nprobs(query.nprobes);
+        if let Some(ef) = query.ef {
+            scanner.ef(ef);
+        }
        scanner.use_index(query.use_index);
        scanner.prefilter(query.base.prefilter);
        match query.base.select {
Author	SHA1	Message	Date
albertlockett	b7fed59278	linter and clippy	2024-11-21 07:11:35 -05:00
albertlockett	60ad82b6ad	add tests for rust	2024-11-21 06:58:51 -05:00
albertlockett	134258308c	it passes version for all read calls	2024-11-20 11:46:04 -05:00
albertlockett	d36334d565	fixed for describe	2024-11-20 10:14:39 -05:00
albertlockett	131c01d702	feat: support for checkout and checkout_latest in remote rust and python sdks	2024-11-19 17:24:28 -05:00
BubbleCal	b2f88f0b29	feat: support to sepcify ef search param (#1844 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-19 23:12:25 +08:00
fzowl	f2e3989831	docs: voyageai embedding in the index (#1813 ) The code to support VoyageAI embedding and rerank models was added in the https://github.com/lancedb/lancedb/pull/1799 PR. Some of the documentation changes was also made, here adding the VoyageAI embedding doc link to the index page. These are my first PRs in lancedb and while i checked the documentation/code structure, i might missed something important. Please let me know if any changes required!	2024-11-18 14:34:16 -08:00
Emmanuel Ferdman	83ae52938a	docs: update migration reference (#1837 ) # PR Summary PR fixes the `migration.md` reference in `docs/src/guides/tables.md`. On the way, it also fixes some typos found in that document. Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>	2024-11-18 14:33:32 -08:00
Lei Xu	267aa83bf8	feat(python): check vector query is not None (#1847 ) Fix the type hints of `nearest_to` method, and raise `ValueError` when the input is None	2024-11-18 14:15:22 -08:00
Will Jones	cc72050206	chore: update package locks (#1845 ) Also ran `npm audit`.	2024-11-18 13:44:06 -08:00
Will Jones	72543c8b9d	test(python): test `with_row_id` in sync query (#1835 ) Also remove weird `MockTable` fixture.	2024-11-18 11:32:52 -08:00
Will Jones	97d6210c33	ci: remove invalid references (#1834 ) Fix release job	2024-11-18 11:32:44 -08:00
Ho Kim	a3d0c27b0a	feat: add support for rustls (#1842 ) Hello, this is a simple PR that supports `rustls-tls` feature. The `reqwest`\`s default TLS `default-tls` is enabled by default, to dismiss the side-effect. The user can use `rustls-tls` like this: ```toml lancedb = { version = "*", default-features = false, features = ["rustls-tls"] } ```	2024-11-18 10:36:20 -08:00
BubbleCal	b23d8abcdd	docs: introduce incremental indexing for FTS (#1789 ) don't merge it before https://github.com/lancedb/lancedb/pull/1769 merged --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2024-11-18 20:21:28 +08:00
Rob Meng	e3ea5cf9b9	chore: bump lance to 0.19.3 (#1839 )	2024-11-16 14:57:52 -05:00
Lance Release	4f8b086175	Updating package-lock.json	2024-11-15 20:18:16 +00:00
Lance Release	72330fb759	Bump version: 0.13.0-beta.3 → 0.13.0	2024-11-15 20:17:59 +00:00
Lance Release	e3b2c5f438	Bump version: 0.13.0-beta.2 → 0.13.0-beta.3	2024-11-15 20:17:55 +00:00