Format python

Signed-off-by: Xuanwo <github@xuanwo.io>
docs: Add examples for where in when_matched_update_all
2025-12-27 15:12:53 +00:00 · 2025-07-10 19:11:02 +08:00 · 2025-07-10 19:08:45 +08:00
47 changed files with 272 additions and 681 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.1"
+current_version = "0.21.1-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1209,31 +1209,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "bon"
-version = "3.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61138465baf186c63e8d9b6b613b508cd832cba4ce93cf37ce5f096f91ac1a6"
-dependencies = [
- "bon-macros",
- "rustversion",
-]
-
-[[package]]
-name = "bon-macros"
-version = "3.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40d1dad34aa19bf02295382f08d9bc40651585bd497266831d40ee6296fb49ca"
-dependencies = [
- "darling",
- "ident_case",
- "prettyplease",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.103",
-]
-
 [[package]]
 name = "brotli"
 version = "3.5.0"
@@ -2515,9 +2490,9 @@ dependencies = [

 [[package]]
 name = "downcast-rs"
-version = "2.0.1"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea8a8b81cacc08888170eef4d13b775126db426d0b348bee9d18c2c1eaf123cf"
+checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"

 [[package]]
 name = "dunce"
@@ -2840,8 +2815,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "rand 0.8.5",
 ]
@@ -3790,6 +3765,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
 dependencies = [
 "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
 ]

 [[package]]
@@ -3930,8 +3908,8 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -3993,8 +3971,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4011,8 +3989,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4047,8 +4025,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4076,8 +4054,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4093,8 +4071,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrayref",
 "arrow",
@@ -4133,8 +4111,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4168,8 +4146,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4223,8 +4201,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4262,11 +4240,10 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow-array",
- "arrow-buffer",
 "arrow-ord",
 "arrow-schema",
 "bitvec",
@@ -4286,8 +4263,8 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4325,8 +4302,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "0.31.2"
-source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
+version = "0.31.1"
+source = "git+https://github.com/lancedb/lance.git?tag=v0.31.1-beta.2#dff098a5aa66866197cfcd7ae7ca004aed02928f"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -4337,7 +4314,7 @@ dependencies = [

 [[package]]
 name = "lancedb"
-version = "0.21.1"
+version = "0.21.1-beta.1"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4424,7 +4401,7 @@ dependencies = [

 [[package]]
 name = "lancedb-node"
-version = "0.21.1"
+version = "0.21.1-beta.1"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4449,7 +4426,7 @@ dependencies = [

 [[package]]
 name = "lancedb-nodejs"
-version = "0.21.1"
+version = "0.21.1-beta.1"
 dependencies = [
 "arrow-array",
 "arrow-ipc",
@@ -4469,7 +4446,7 @@ dependencies = [

 [[package]]
 name = "lancedb-python"
-version = "0.24.1"
+version = "0.24.1-beta.1"
 dependencies = [
 "arrow",
 "env_logger",
@@ -4744,10 +4721,11 @@ dependencies = [

 [[package]]
 name = "measure_time"
-version = "0.9.0"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e"
+checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc"
 dependencies = [
+ "instant",
 "log",
 ]

@@ -5281,9 +5259,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"

 [[package]]
 name = "ownedbytes"
-version = "0.9.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e"
+checksum = "c3a059efb063b8f425b948e042e6b9bd85edfe60e913630ed727b23e2dfcc558"
 dependencies = [
 "stable_deref_trait",
 ]
@@ -7077,9 +7055,9 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"

 [[package]]
 name = "sketches-ddsketch"
-version = "0.3.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a"
+checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
 dependencies = [
 "serde",
 ]
@@ -7409,15 +7387,14 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"

 [[package]]
 name = "tantivy"
-version = "0.24.1"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2374a21157427c5faff2d90930f035b6c22a5d7b0e5b0b7f522e988ef33c06"
+checksum = "f8d0582f186c0a6d55655d24543f15e43607299425c5ad8352c242b914b31856"
 dependencies = [
 "aho-corasick",
 "arc-swap",
 "base64 0.22.1",
 "bitpacking",
- "bon",
 "byteorder",
 "census",
 "crc32fast",
@@ -7427,20 +7404,20 @@ dependencies = [
 "fnv",
 "fs4",
 "htmlescape",
- "hyperloglogplus",
- "itertools 0.14.0",
+ "itertools 0.12.1",
 "levenshtein_automata",
 "log",
 "lru",
 "lz4_flex",
 "measure_time",
 "memmap2 0.9.5",
+ "num_cpus",
 "once_cell",
 "oneshot",
 "rayon",
 "regex",
 "rust-stemmers",
- "rustc-hash 2.1.1",
+ "rustc-hash 1.1.0",
 "serde",
 "serde_json",
 "sketches-ddsketch",
@@ -7453,7 +7430,7 @@ dependencies = [
 "tantivy-stacker",
 "tantivy-tokenizer-api",
 "tempfile",
- "thiserror 2.0.12",
+ "thiserror 1.0.69",
 "time",
 "uuid",
 "winapi",
@@ -7461,22 +7438,22 @@ dependencies = [

 [[package]]
 name = "tantivy-bitpacker"
-version = "0.8.0"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494"
+checksum = "284899c2325d6832203ac6ff5891b297fc5239c3dc754c5bc1977855b23c10df"
 dependencies = [
 "bitpacking",
 ]

 [[package]]
 name = "tantivy-columnar"
-version = "0.5.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344"
+checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e"
 dependencies = [
 "downcast-rs",
 "fastdivide",
- "itertools 0.14.0",
+ "itertools 0.12.1",
 "serde",
 "tantivy-bitpacker",
 "tantivy-common",
@@ -7486,9 +7463,9 @@ dependencies = [

 [[package]]
 name = "tantivy-common"
-version = "0.9.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f"
+checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -7510,23 +7487,19 @@ dependencies = [

 [[package]]
 name = "tantivy-query-grammar"
-version = "0.24.0"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a"
+checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82"
 dependencies = [
 "nom",
- "serde",
- "serde_json",
 ]

 [[package]]
 name = "tantivy-sstable"
-version = "0.5.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416"
+checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e"
 dependencies = [
- "futures-util",
- "itertools 0.14.0",
 "tantivy-bitpacker",
 "tantivy-common",
 "tantivy-fst",
@@ -7535,9 +7508,9 @@ dependencies = [

 [[package]]
 name = "tantivy-stacker"
-version = "0.5.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1"
+checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8"
 dependencies = [
 "murmurhash32",
 "rand_distr 0.4.3",
@@ -7546,9 +7519,9 @@ dependencies = [

 [[package]]
 name = "tantivy-tokenizer-api"
-version = "0.5.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d"
+checksum = "2a0dcade25819a89cfe6f17d932c9cedff11989936bf6dd4f336d50392053b04"
 dependencies = [
 "serde",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,16 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.31.2", "features" = [
-    "dynamodb",
-], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git", features = ["dynamodb"] }
+lance-io = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.31.1", tag="v0.31.1-beta.2", git="https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -47,10 +47,10 @@ def extract_features(line: str) -> list:
    """
    import re

-    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
+    match = re.search(r'"features"\s*=\s*\[(.*?)\]', line)
    if match:
        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
+        return [f.strip('"') for f in features_str.split(",")]
    return []


@@ -63,24 +63,10 @@ def update_cargo_toml(line_updater):
        lines = f.readlines()

    new_lines = []
-    lance_line = ""
-    is_parsing_lance_line = False
    for line in lines:
        if line.startswith("lance"):
            # Update the line using the provided function
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(line))
-            else:
-                lance_line = line
-                is_parsing_lance_line = True
-        elif is_parsing_lance_line:
-            lance_line += line
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(lance_line))
-                lance_line = ""
-                is_parsing_lance_line = False
-            else:
-                print("doesn't end with }:", line)
+            new_lines.append(line_updater(line))
        else:
            # Keep the line unchanged
            new_lines.append(line)
--- a/docs/src/guides/tables/merge_insert.md
+++ b/docs/src/guides/tables/merge_insert.md
@@ -71,6 +71,45 @@ with merge insert, enable both `when_matched_update_all()` and
    If a column is nullable, it can be omitted from input data and it will be
    considered `null`. Columns can also be provided in any order.

+### Conditional Updates
+
+You can add a `where` clause to `when_matched_update_all()` to only update rows 
+that meet certain conditions. When using the `where` parameter, you must prefix 
+column names with either `source.` (for the new data) or `target.` (for the 
+existing data) to specify which table you're referencing.
+
+=== "Python"
+
+    ```python
+    # Only update rows where the target's status is 'active'
+    table.merge_insert("id")
+        .when_matched_update_all(where="target.status = 'active'")
+        .when_not_matched_insert_all()
+        .execute(new_data)
+
+    # Only update if the new price is higher than the existing price
+    table.merge_insert("product_id")
+        .when_matched_update_all(where="source.price > target.price")
+        .when_not_matched_insert_all()
+        .execute(new_data)
+    ```
+
+=== "Typescript"
+
+    ```typescript
+    // Only update rows where the target's status is 'active'
+    await table.mergeInsert("id")
+        .whenMatchedUpdateAll({ where: "target.status = 'active'" })
+        .whenNotMatchedInsertAll()
+        .execute(newData);
+
+    // Only update if the new price is higher than the existing price
+    await table.mergeInsert("product_id")
+        .whenMatchedUpdateAll({ where: "source.price > target.price" })
+        .whenNotMatchedInsertAll()
+        .execute(newData);
+    ```
+
 ## Insert-if-not-exists

 To avoid inserting duplicate rows, you can use the insert-if-not-exists command.
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.1-final.0</version>
+        <version>0.21.1-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.1-final.0</version>
+    <version>0.21.1-beta.1</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.21.1",
+  "version": "0.21.1-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.21.1",
+      "version": "0.21.1-beta.1",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.1",
-        "@lancedb/vectordb-darwin-x64": "0.21.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.1"
+        "@lancedb/vectordb-darwin-arm64": "0.21.1-beta.1",
+        "@lancedb/vectordb-darwin-x64": "0.21.1-beta.1",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.21.1-beta.1",
+        "@lancedb/vectordb-linux-x64-gnu": "0.21.1-beta.1",
+        "@lancedb/vectordb-win32-x64-msvc": "0.21.1-beta.1"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -326,71 +326,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.1.tgz",
-      "integrity": "sha512-eXeOKgK5s7MSKDzA7Hl4/9E2X8tWWMNV7UJiFdwxrUcop86tM5ePBi8tApRnaQ3wBXrs99XTVBJ7+j+2gzilVA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.1.tgz",
-      "integrity": "sha512-vLoPWfg7OPw5vazLH5/YD/yQkZiTiPniuQgsH+xTodRfLf926lny53G7LQ6nFXNKIzX/jYKtg7AfMU8IcDLSEQ==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.1.tgz",
-      "integrity": "sha512-IMAxtXj5aHCv9peziN77IxQpkYFj83KvI8zQCHzbMMXv7BspkhAd0PaUViqHqtTf2TUHjYQ66a7clZrEn+xQuQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.1.tgz",
-      "integrity": "sha512-9oPOxBsYGngIhtC/oC+fQ9V0w9mgFuj2Wyler8f5UYQdiAutsTNyOUA+XjtcROjVZrZ5oUeIrvOQSte9BbpRTg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.1.tgz",
-      "integrity": "sha512-XqDXFLfdjNpDZ5jaqLerdx+sDU4YLuPK3VF4TowwcOlWDrUtI/L1lAyCaKxcyz1qE3VGuZvhNU89N5ioEICb4Q==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ]
-    },
    "node_modules/@neon-rs/cli": {
      "version": "0.0.160",
      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.21.1",
+  "version": "0.21.1-beta.1",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.1",
-    "@lancedb/vectordb-darwin-arm64": "0.21.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.1"
+    "@lancedb/vectordb-darwin-x64": "0.21.1-beta.1",
+    "@lancedb/vectordb-darwin-arm64": "0.21.1-beta.1",
+    "@lancedb/vectordb-linux-x64-gnu": "0.21.1-beta.1",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.21.1-beta.1",
+    "@lancedb/vectordb-win32-x64-msvc": "0.21.1-beta.1"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.1"
+version = "0.21.1-beta.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1706,60 +1706,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(mustNotResults.length).toBe(1);
    });

-    test("full text search ngram", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "hello world", vector: [0.1, 0.2, 0.3] },
-        { text: "lance database", vector: [0.4, 0.5, 0.6] },
-        { text: "lance is cool", vector: [0.7, 0.8, 0.9] },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ baseTokenizer: "ngram" }),
-      });
-
-      const results = await table.search("lan").toArray();
-      expect(results.length).toBe(2);
-      const resultSet = new Set(results.map((r) => r.text));
-      expect(resultSet.has("lance database")).toBe(true);
-      expect(resultSet.has("lance is cool")).toBe(true);
-
-      const results2 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results2.length).toBe(2);
-      const resultSet2 = new Set(results2.map((r) => r.text));
-      expect(resultSet2.has("lance database")).toBe(true);
-      expect(resultSet2.has("lance is cool")).toBe(true);
-
-      // the default min_ngram_length is 3, so "la" should not match
-      const results3 = await table.search("la").toArray();
-      expect(results3.length).toBe(0);
-
-      // test setting min_ngram_length and prefix_only
-      await table.createIndex("text", {
-        config: Index.fts({
-          baseTokenizer: "ngram",
-          ngramMinLength: 2,
-          prefixOnly: true,
-        }),
-        replace: true,
-      });
-
-      const results4 = await table.search("lan").toArray();
-      expect(results4.length).toBe(2);
-      const resultSet4 = new Set(results4.map((r) => r.text));
-      expect(resultSet4.has("lance database")).toBe(true);
-      expect(resultSet4.has("lance is cool")).toBe(true);
-
-      const results5 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results5.length).toBe(0);
-
-      const results6 = await table.search("la").toArray();
-      expect(results6.length).toBe(2);
-      const resultSet6 = new Set(results6.map((r) => r.text));
-      expect(resultSet6.has("lance database")).toBe(true);
-      expect(resultSet6.has("lance is cool")).toBe(true);
-    });
-
    test.each([
      [0.4, 0.5, 0.599], // number[]
      Float32Array.of(0.4, 0.5, 0.599), // Float32Array
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -439,7 +439,7 @@ export interface FtsOptions {
   *
   * "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
   */
-  baseTokenizer?: "simple" | "whitespace" | "raw" | "ngram";
+  baseTokenizer?: "simple" | "whitespace" | "raw";

  /**
   * language for stemming and stop words
@@ -472,21 +472,6 @@ export interface FtsOptions {
   * whether to remove punctuation
   */
  asciiFolding?: boolean;
-
-  /**
-   * ngram min length
-   */
-  ngramMinLength?: number;
-
-  /**
-   * ngram max length
-   */
-  ngramMaxLength?: number;
-
-  /**
-   * whether to only index the prefix of the token for ngram tokenizer
-   */
-  prefixOnly?: boolean;
 }

 export class Index {
@@ -623,9 +608,6 @@ export class Index {
        options?.stem,
        options?.removeStopWords,
        options?.asciiFolding,
-        options?.ngramMinLength,
-        options?.ngramMaxLength,
-        options?.prefixOnly,
      ),
    );
  }
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -75,10 +75,10 @@ export interface OptimizeOptions {
   * // Delete all versions older than 1 day
   * const olderThan = new Date();
   * olderThan.setDate(olderThan.getDate() - 1));
-   * tbl.optimize({cleanupOlderThan: olderThan});
+   * tbl.cleanupOlderVersions(olderThan);
   *
   * // Delete all versions except the current version
-   * tbl.optimize({cleanupOlderThan: new Date()});
+   * tbl.cleanupOlderVersions(new Date());
   */
  cleanupOlderThan: Date;
  deleteUnverified: boolean;
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.1",
+  "version": "0.21.1-beta.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.1",
+	"version": "0.21.1-beta.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.1",
+  "version": "0.21.1-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.1",
+      "version": "0.21.1-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.1",
+  "version": "0.21.1-beta.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -123,9 +123,6 @@ impl Index {
        stem: Option<bool>,
        remove_stop_words: Option<bool>,
        ascii_folding: Option<bool>,
-        ngram_min_length: Option<u32>,
-        ngram_max_length: Option<u32>,
-        prefix_only: Option<bool>,
    ) -> Self {
        let mut opts = FtsIndexBuilder::default();
        if let Some(with_position) = with_position {
@@ -152,15 +149,6 @@ impl Index {
        if let Some(ascii_folding) = ascii_folding {
            opts = opts.ascii_folding(ascii_folding);
        }
-        if let Some(ngram_min_length) = ngram_min_length {
-            opts = opts.ngram_min_length(ngram_min_length);
-        }
-        if let Some(ngram_max_length) = ngram_max_length {
-            opts = opts.ngram_max_length(ngram_max_length);
-        }
-        if let Some(prefix_only) = prefix_only {
-            opts = opts.ngram_prefix_only(prefix_only);
-        }

        Self {
            inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.24.2-beta.0"
+current_version = "0.24.1-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.24.2-beta.0"
+version = "0.24.1-beta.1"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -137,9 +137,6 @@ class FTS:
    stem: bool = True
    remove_stop_words: bool = True
    ascii_folding: bool = True
-    ngram_min_length: int = 3
-    ngram_max_length: int = 3
-    prefix_only: bool = False


@dataclass
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -45,6 +45,16 @@ class LanceMergeInsertBuilder(object):
        If there are multiple matches then the behavior is undefined.
        Currently this causes multiple copies of the row to be created
        but that behavior is subject to change.
+
+        Parameters
+        ----------
+        where : Optional[str], default None
+            A SQL filter expression to apply to matched rows. The filter must
+            specify whether you are referencing the source table (new data) or
+            the target table (existing data) by prefixing column names with
+            "source." or "target." respectively.
+
+            Example: "target.status = 'active'" or "source.price > target.price"
        """
        self._when_matched_update_all = True
        self._when_matched_update_all_condition = where
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -1374,8 +1374,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        if query_string is not None and not isinstance(query_string, str):
            raise ValueError("Reranking currently only supports string queries")
        self._str_query = query_string if query_string is not None else self._str_query
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self

    def bypass_vector_index(self) -> LanceVectorQueryBuilder:
@@ -1571,8 +1569,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
            The LanceQueryBuilder object.
        """
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self


@@ -1849,8 +1845,6 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):

        self._norm = normalize
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)

        return self

--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -158,9 +158,6 @@ class RemoteTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        config = FTS(
            with_position=with_position,
@@ -171,9 +168,6 @@ class RemoteTable(Table):
            stem=stem,
            remove_stop_words=remove_stop_words,
            ascii_folding=ascii_folding,
-            ngram_min_length=ngram_min_length,
-            ngram_max_length=ngram_max_length,
-            prefix_only=prefix_only,
        )
        LOOP.run(
            self._table.create_index(
--- a/python/python/lancedb/rerankers/answerdotai.py
+++ b/python/python/lancedb/rerankers/answerdotai.py
@@ -74,7 +74,9 @@ class AnswerdotaiRerankers(Reranker):
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
        elif self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
+            raise NotImplementedError(
+                "Answerdotai Reranker does not support score='all' yet"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/base.py
+++ b/python/python/lancedb/rerankers/base.py
@@ -232,39 +232,6 @@ class Reranker(ABC):

        return deduped_table

-    def _merge_and_keep_scores(self, vector_results: pa.Table, fts_results: pa.Table):
-        """
-        Merge the results from the vector and FTS search and keep the scores.
-        This op is slower than just keeping relevance score but can be useful
-        for debugging.
-        """
-        # add nulls to fts results for _distance
-        if "_distance" not in fts_results.column_names:
-            fts_results = fts_results.append_column(
-                "_distance",
-                pa.array([None] * len(fts_results), type=pa.float32()),
-            )
-        # add nulls to vector results for _score
-        if "_score" not in vector_results.column_names:
-            vector_results = vector_results.append_column(
-                "_score",
-                pa.array([None] * len(vector_results), type=pa.float32()),
-            )
-
-        # combine them and fill the scores
-        vector_results_dict = {row["_rowid"]: row for row in vector_results.to_pylist()}
-        fts_results_dict = {row["_rowid"]: row for row in fts_results.to_pylist()}
-
-        # merge them into vector_results
-        for key, value in fts_results_dict.items():
-            if key in vector_results_dict:
-                vector_results_dict[key]["_score"] = value["_score"]
-            else:
-                vector_results_dict[key] = value
-
-        combined = pa.Table.from_pylist(list(vector_results_dict.values()))
-        return combined
-
    def _keep_relevance_score(self, combined_results: pa.Table):
        if self.score == "relevance":
            if "_score" in combined_results.column_names:
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -92,14 +92,14 @@ class CohereReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for cohere reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/cross_encoder.py
+++ b/python/python/lancedb/rerankers/cross_encoder.py
@@ -81,15 +81,15 @@ class CrossEncoderReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        # sort the results by _score
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for CrossEncoderReranker"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/jinaai.py
+++ b/python/python/lancedb/rerankers/jinaai.py
@@ -97,14 +97,14 @@ class JinaReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for JinaReranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/openai.py
+++ b/python/python/lancedb/rerankers/openai.py
@@ -88,13 +88,14 @@ class OpenaiReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
+        elif self.score == "all":
+            raise NotImplementedError(
+                "OpenAI Reranker does not support score='all' yet"
+            )

        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -94,14 +94,14 @@ class VoyageAIReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for voyageai reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -838,9 +838,6 @@ class Table(ABC):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
        wait_timeout: Optional[timedelta] = None,
    ):
        """Create a full-text search index on the table.
@@ -880,7 +877,6 @@ class Table(ABC):
            - "simple": Splits text by whitespace and punctuation.
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
-            - "ngram": N-Gram tokenizer.
        language : str, default "English"
            The language to use for tokenization.
        max_token_length : int, default 40
@@ -898,12 +894,6 @@ class Table(ABC):
        ascii_folding : bool, default True
            Whether to fold ASCII characters. This converts accented characters to
            their ASCII equivalent. For example, "café" would be converted to "cafe".
-        ngram_min_length: int, default 3
-            The minimum length of an n-gram.
-        ngram_max_length: int, default 3
-            The maximum length of an n-gram.
-        prefix_only: bool, default False
-            Whether to only index the prefix of the token for ngram tokenizer.
        wait_timeout: timedelta, optional
            The timeout to wait if indexing is asynchronous.
        """
@@ -1991,9 +1981,6 @@ class LanceTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        if not use_tantivy:
            if not isinstance(field_names, str):
@@ -2009,9 +1996,6 @@ class LanceTable(Table):
                    "stem": stem,
                    "remove_stop_words": remove_stop_words,
                    "ascii_folding": ascii_folding,
-                    "ngram_min_length": ngram_min_length,
-                    "ngram_max_length": ngram_max_length,
-                    "prefix_only": prefix_only,
                }
            else:
                tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
@@ -2081,9 +2065,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "raw":
            return {
@@ -2094,9 +2075,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "whitespace":
            return {
@@ -2107,9 +2085,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }

        # or it's with language stemming with pattern like "en_stem"
@@ -2128,9 +2103,6 @@ class LanceTable(Table):
            "stem": True,
            "remove_stop_words": False,
            "ascii_folding": False,
-            "ngram_min_length": 3,
-            "ngram_max_length": 3,
-            "prefix_only": False,
        }

    def add(
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -25,4 +25,4 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = Literal["simple", "raw", "whitespace"]
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -669,46 +669,3 @@ def test_fts_on_list(mem_db: DBConnection):

    res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
    assert len(res) == 2
-
-
-def test_fts_ngram(mem_db: DBConnection):
-    data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
-    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    # the default min_ngram_length is 3, so "la" should not match
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 0
-
-    # test setting min_ngram_length and prefix_only
-    table.create_fts_index(
-        "text",
-        use_tantivy=False,
-        base_tokenizer="ngram",
-        replace=True,
-        ngram_min_length=2,
-        prefix_only=True,
-    )
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 0
-
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -272,9 +272,7 @@ async def test_distance_range_with_new_rows_async():
    # append more rows so that execution plan would be mixed with ANN & Flat KNN
    new_data = pa.table(
        {
-            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
-                np.random.rand(4, 2) + 1
-            ),
+            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
        }
    )
    await table.add(new_data)
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -499,19 +499,3 @@ def test_empty_result_reranker():
            .rerank(reranker)
            .to_arrow()
        )
-
-
-@pytest.mark.parametrize("use_tantivy", [True, False])
-def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
-    pytest.importorskip("sentence_transformers")
-    reranker = CrossEncoderReranker(return_score="all")
-    table, schema = get_test_table(tmp_path, use_tantivy)
-    query = "single player experience"
-    result = (
-        table.search(query, query_type="hybrid", vector_column_name="vector")
-        .rerank(reranker=reranker)
-        .to_arrow()
-    )
-    assert "_relevance_score" in result.column_names
-    assert "_score" in result.column_names
-    assert "_distance" in result.column_names
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -47,10 +47,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                    .max_token_length(params.max_token_length)
                    .remove_stop_words(params.remove_stop_words)
                    .stem(params.stem)
-                    .ascii_folding(params.ascii_folding)
-                    .ngram_min_length(params.ngram_min_length)
-                    .ngram_max_length(params.ngram_max_length)
-                    .ngram_prefix_only(params.prefix_only);
+                    .ascii_folding(params.ascii_folding);
                Ok(LanceDbIndex::FTS(inner_opts))
            },
            "IvfFlat" => {
@@ -133,9 +130,6 @@ struct FtsParams {
    stem: bool,
    remove_stop_words: bool,
    ascii_folding: bool,
-    ngram_min_length: u32,
-    ngram_max_length: u32,
-    prefix_only: bool,
 }

 #[derive(FromPyObject)]
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.21.1"
+version = "0.21.1-beta.1"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.21.1"
+version = "0.21.1-beta.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -8,7 +8,7 @@ use std::path::Path;
 use std::{collections::HashMap, sync::Arc};

 use lance::dataset::{ReadParams, WriteMode};
-use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
+use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry, WrappingObjectStore};
 use lance_datafusion::utils::StreamingWriteSource;
 use lance_encoding::version::LanceFileVersion;
 use lance_table::io::commit::commit_handler_from_url;
@@ -217,9 +217,6 @@ pub struct ListingDatabase {

    // Options for tables created by this connection
    new_table_config: NewTableConfig,
-
-    // Session for object stores and caching
-    session: Arc<lance::session::Session>,
 }

 impl std::fmt::Display for ListingDatabase {
@@ -316,17 +313,13 @@ impl ListingDatabase {

                let plain_uri = url.to_string();

-                let session = Arc::new(lance::session::Session::default());
+                let registry = Arc::new(ObjectStoreRegistry::default());
                let os_params = ObjectStoreParams {
                    storage_options: Some(options.storage_options.clone()),
                    ..Default::default()
                };
-                let (object_store, base_path) = ObjectStore::from_uri_and_params(
-                    session.store_registry(),
-                    &plain_uri,
-                    &os_params,
-                )
-                .await?;
+                let (object_store, base_path) =
+                    ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
                if object_store.is_local() {
                    Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
                }
@@ -349,7 +342,6 @@ impl ListingDatabase {
                    read_consistency_interval: request.read_consistency_interval,
                    storage_options: options.storage_options,
                    new_table_config: options.new_table_config,
-                    session,
                })
            }
            Err(_) => {
@@ -368,13 +360,7 @@ impl ListingDatabase {
        read_consistency_interval: Option<std::time::Duration>,
        new_table_config: NewTableConfig,
    ) -> Result<Self> {
-        let session = Arc::new(lance::session::Session::default());
-        let (object_store, base_path) = ObjectStore::from_uri_and_params(
-            session.store_registry(),
-            path,
-            &ObjectStoreParams::default(),
-        )
-        .await?;
+        let (object_store, base_path) = ObjectStore::from_uri(path).await?;
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
@@ -388,7 +374,6 @@ impl ListingDatabase {
            read_consistency_interval,
            storage_options: HashMap::new(),
            new_table_config,
-            session,
        })
    }

@@ -456,128 +441,6 @@ impl ListingDatabase {
        }
        Ok(())
    }
-
-    /// Inherit storage options from the connection into the target map
-    fn inherit_storage_options(&self, target: &mut HashMap<String, String>) {
-        for (key, value) in self.storage_options.iter() {
-            if !target.contains_key(key) {
-                target.insert(key.clone(), value.clone());
-            }
-        }
-    }
-
-    /// Extract storage option overrides from the request
-    fn extract_storage_overrides(
-        &self,
-        request: &CreateTableRequest,
-    ) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
-        let storage_options = request
-            .write_options
-            .lance_write_params
-            .as_ref()
-            .and_then(|p| p.store_params.as_ref())
-            .and_then(|sp| sp.storage_options.as_ref());
-
-        let storage_version_override = storage_options
-            .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
-            .map(|s| s.parse::<LanceFileVersion>())
-            .transpose()?;
-
-        let v2_manifest_override = storage_options
-            .and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS))
-            .map(|s| s.parse::<bool>())
-            .transpose()
-            .map_err(|_| Error::InvalidInput {
-                message: "enable_v2_manifest_paths must be a boolean".to_string(),
-            })?;
-
-        Ok((storage_version_override, v2_manifest_override))
-    }
-
-    /// Prepare write parameters for table creation
-    fn prepare_write_params(
-        &self,
-        request: &CreateTableRequest,
-        storage_version_override: Option<LanceFileVersion>,
-        v2_manifest_override: Option<bool>,
-    ) -> lance::dataset::WriteParams {
-        let mut write_params = request
-            .write_options
-            .lance_write_params
-            .clone()
-            .unwrap_or_default();
-
-        // Only modify the storage options if we actually have something to
-        // inherit. There is a difference between storage_options=None and
-        // storage_options=Some({}). Using storage_options=None will cause the
-        // connection's session store registry to be used. Supplying Some({})
-        // will cause a new connection to be created, and that connection will
-        // be dropped from the cache when python GCs the table object, which
-        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = write_params
-                .store_params
-                .get_or_insert_with(Default::default)
-                .storage_options
-                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
-        }
-
-        write_params.data_storage_version = self
-            .new_table_config
-            .data_storage_version
-            .or(storage_version_override);
-
-        if let Some(enable_v2_manifest_paths) = self
-            .new_table_config
-            .enable_v2_manifest_paths
-            .or(v2_manifest_override)
-        {
-            write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
-        }
-
-        if matches!(&request.mode, CreateTableMode::Overwrite) {
-            write_params.mode = WriteMode::Overwrite;
-        }
-
-        write_params.session = Some(self.session.clone());
-
-        write_params
-    }
-
-    /// Handle the case where table already exists based on the create mode
-    async fn handle_table_exists(
-        &self,
-        table_name: &str,
-        mode: CreateTableMode,
-        data_schema: &arrow_schema::Schema,
-    ) -> Result<Arc<dyn BaseTable>> {
-        match mode {
-            CreateTableMode::Create => Err(Error::TableAlreadyExists {
-                name: table_name.to_string(),
-            }),
-            CreateTableMode::ExistOk(callback) => {
-                let req = OpenTableRequest {
-                    name: table_name.to_string(),
-                    index_cache_size: None,
-                    lance_read_params: None,
-                };
-                let req = (callback)(req);
-                let table = self.open_table(req).await?;
-
-                let table_schema = table.schema().await?;
-
-                if table_schema.as_ref() != data_schema {
-                    return Err(Error::Schema {
-                        message: "Provided schema does not match existing table schema".to_string(),
-                    });
-                }
-
-                Ok(table)
-            }
-            CreateTableMode::Overwrite => unreachable!(),
-        }
-    }
 }

 #[async_trait::async_trait]
@@ -612,14 +475,50 @@ impl Database for ListingDatabase {
        Ok(f)
    }

-    async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
+    async fn create_table(&self, mut request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
        let table_uri = self.table_uri(&request.name)?;
+        // Inherit storage options from the connection
+        let storage_options = request
+            .write_options
+            .lance_write_params
+            .get_or_insert_with(Default::default)
+            .store_params
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
+        }

-        let (storage_version_override, v2_manifest_override) =
-            self.extract_storage_overrides(&request)?;
+        let storage_options = storage_options.clone();

-        let write_params =
-            self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
+        let mut write_params = request.write_options.lance_write_params.unwrap_or_default();
+
+        if let Some(storage_version) = &self.new_table_config.data_storage_version {
+            write_params.data_storage_version = Some(*storage_version);
+        } else {
+            // Allow the user to override the storage version via storage options (backwards compatibility)
+            if let Some(data_storage_version) = storage_options.get(OPT_NEW_TABLE_STORAGE_VERSION) {
+                write_params.data_storage_version = Some(data_storage_version.parse()?);
+            }
+        }
+        if let Some(enable_v2_manifest_paths) = self.new_table_config.enable_v2_manifest_paths {
+            write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
+        } else {
+            // Allow the user to override the storage version via storage options (backwards compatibility)
+            if let Some(enable_v2_manifest_paths) = storage_options
+                .get(OPT_NEW_TABLE_V2_MANIFEST_PATHS)
+                .map(|s| s.parse::<bool>().unwrap())
+            {
+                write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
+            }
+        }
+
+        if matches!(&request.mode, CreateTableMode::Overwrite) {
+            write_params.mode = WriteMode::Overwrite;
+        }

        let data_schema = request.data.arrow_schema();

@@ -634,10 +533,30 @@ impl Database for ListingDatabase {
        .await
        {
            Ok(table) => Ok(Arc::new(table)),
-            Err(Error::TableAlreadyExists { .. }) => {
-                self.handle_table_exists(&request.name, request.mode, &data_schema)
-                    .await
-            }
+            Err(Error::TableAlreadyExists { name }) => match request.mode {
+                CreateTableMode::Create => Err(Error::TableAlreadyExists { name }),
+                CreateTableMode::ExistOk(callback) => {
+                    let req = OpenTableRequest {
+                        name: request.name.clone(),
+                        index_cache_size: None,
+                        lance_read_params: None,
+                    };
+                    let req = (callback)(req);
+                    let table = self.open_table(req).await?;
+
+                    let table_schema = table.schema().await?;
+
+                    if table_schema != data_schema {
+                        return Err(Error::Schema {
+                            message: "Provided schema does not match existing table schema"
+                                .to_string(),
+                        });
+                    }
+
+                    Ok(table)
+                }
+                CreateTableMode::Overwrite => unreachable!(),
+            },
            Err(err) => Err(err),
        }
    }
@@ -645,22 +564,18 @@ impl Database for ListingDatabase {
    async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
        let table_uri = self.table_uri(&request.name)?;

-        // Only modify the storage options if we actually have something to
-        // inherit. There is a difference between storage_options=None and
-        // storage_options=Some({}). Using storage_options=None will cause the
-        // connection's session store registry to be used. Supplying Some({})
-        // will cause a new connection to be created, and that connection will
-        // be dropped from the cache when python GCs the table object, which
-        // confounds reuse across tables.
-        if !self.storage_options.is_empty() {
-            let storage_options = request
-                .lance_read_params
-                .get_or_insert_with(Default::default)
-                .store_options
-                .get_or_insert_with(Default::default)
-                .storage_options
-                .get_or_insert_with(Default::default);
-            self.inherit_storage_options(storage_options);
+        // Inherit storage options from the connection
+        let storage_options = request
+            .lance_read_params
+            .get_or_insert_with(Default::default)
+            .store_options
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
        }

        // Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -669,14 +584,13 @@ impl Database for ListingDatabase {
        // If we have a user provided ReadParams use that
        // If we don't then start with the default ReadParams and customize it with
        // the options from the OpenTableBuilder
-        let mut read_params = request.lance_read_params.unwrap_or_else(|| {
+        let read_params = request.lance_read_params.unwrap_or_else(|| {
            let mut default_params = ReadParams::default();
            if let Some(index_cache_size) = request.index_cache_size {
                default_params.index_cache_size = index_cache_size as usize;
            }
            default_params
        });
-        read_params.session(self.session.clone());

        let native_table = Arc::new(
            NativeTable::open_with_params(
--- a/rust/lancedb/tests/object_store_test.rs
+++ b/rust/lancedb/tests/object_store_test.rs
@@ -281,46 +281,6 @@ async fn test_encryption() -> Result<()> {
    Ok(())
 }

-#[tokio::test]
-async fn test_table_storage_options_override() -> Result<()> {
-    // Test that table-level storage options override connection-level options
-    let bucket = S3Bucket::new("test-override").await;
-    let key1 = KMSKey::new().await;
-    let key2 = KMSKey::new().await;
-
-    let uri = format!("s3://{}", bucket.0);
-
-    // Create connection with key1 encryption
-    let db = lancedb::connect(&uri)
-        .storage_options(CONFIG.iter().cloned())
-        .storage_option("aws_server_side_encryption", "aws:kms")
-        .storage_option("aws_sse_kms_key_id", &key1.0)
-        .execute()
-        .await?;
-
-    // Create table overriding with key2 encryption
-    let data = test_data();
-    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
-    let _table = db
-        .create_table("test_override", data)
-        .storage_option("aws_sse_kms_key_id", &key2.0)
-        .execute()
-        .await?;
-
-    // Verify objects are encrypted with key2, not key1
-    validate_objects_encrypted(&bucket.0, "test_override", &key2.0).await;
-
-    // Also test that a table created without override uses connection settings
-    let data = test_data();
-    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
-    let _table2 = db.create_table("test_inherit", data).execute().await?;
-
-    // Verify this table uses key1 from connection
-    validate_objects_encrypted(&bucket.0, "test_inherit", &key1.0).await;
-
-    Ok(())
-}
-
 struct DynamoDBCommitTable(String);

 impl DynamoDBCommitTable {
Author	SHA1	Message	Date
Xuanwo	16a7e29639	Format python Signed-off-by: Xuanwo <github@xuanwo.io>	2025-07-10 19:11:02 +08:00
Xuanwo	0e7a218d62	docs: Add examples for where in when_matched_update_all Signed-off-by: Xuanwo <github@xuanwo.io>	2025-07-10 19:08:45 +08:00