Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
65e7b80d1e Bump version: 0.21.1-beta.2 → 0.21.1 2025-07-10 21:36:31 +00:00
45 changed files with 218 additions and 577 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.21.2-beta.0"
current_version = "0.21.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

74
Cargo.lock generated
View File

@@ -2840,8 +2840,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2afa8e97cfde0f44698a13dc5afc0a29840eb0d252a918986b34dc5baa166d9a"
dependencies = [
"rand 0.8.5",
]
@@ -3930,8 +3931,9 @@ dependencies = [
[[package]]
name = "lance"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8258e6fe0283f6a9764ffe520e3e2e7727b00a7f14f03852b167692608242f0e"
dependencies = [
"arrow",
"arrow-arith",
@@ -3993,8 +3995,9 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135b0fdadc4ada6f8c382379a97f09f6a6186f05b33bd23743a959151a5cf233"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4011,8 +4014,9 @@ dependencies = [
[[package]]
name = "lance-core"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83eb8468fcdfe4b4eacbcb1c151a72ac032d9143f4203a7f3bfbde64214a40b3"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4047,8 +4051,9 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95a266374d53d37ee4378fbc3e4827931e1119e609bc41b64eb5a5ca93c8e8eb"
dependencies = [
"arrow",
"arrow-array",
@@ -4076,8 +4081,9 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3890c9f35c318969735387f154c7633ccd5039e6a9879b9372a4005e43182073"
dependencies = [
"arrow",
"arrow-array",
@@ -4093,8 +4099,9 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b68ab7867165826bf893dcff9e2ce565e6a0299862b7315a1c0ef1470a6c9144"
dependencies = [
"arrayref",
"arrow",
@@ -4133,8 +4140,9 @@ dependencies = [
[[package]]
name = "lance-file"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc5c8e5b19054c74e0bf7fcfa6038bfec0f881209d66e04e41f4a4f2e0272317"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4168,8 +4176,9 @@ dependencies = [
[[package]]
name = "lance-index"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa06deb03ad01fb42790f525e60b297ff9011a6590f769f698626a8e5ea53350"
dependencies = [
"arrow",
"arrow-array",
@@ -4223,8 +4232,9 @@ dependencies = [
[[package]]
name = "lance-io"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ac1794d9fe428e5a75fdef80e259b5d16af7ee5c95b866f1270c78368f206ed"
dependencies = [
"arrow",
"arrow-arith",
@@ -4262,11 +4272,11 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42bfda62395e6123de712a7fbd1a5db0c678e53a6f46f1c33d9729814106e7f1"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-ord",
"arrow-schema",
"bitvec",
@@ -4286,8 +4296,9 @@ dependencies = [
[[package]]
name = "lance-table"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6919d1e0f24741a01b3ee65ee57f05e89472b75b6086874936e1f01e456f6f6f"
dependencies = [
"arrow",
"arrow-array",
@@ -4325,8 +4336,9 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "0.31.2"
source = "git+https://github.com/lancedb/lance.git?tag=v0.31.2-beta.3#6e987921d0efbe42c018047ea45d1b6f624d8280"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0ed2c0ac602bdcc17836b8c2c74cbb8fff962570f2df40d6c1d4a540053de44"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4337,7 +4349,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.21.1"
version = "0.21.1-beta.1"
dependencies = [
"arrow",
"arrow-array",
@@ -4424,7 +4436,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.21.1"
version = "0.21.1-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4449,7 +4461,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.21.1"
version = "0.21.1-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4469,7 +4481,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.24.1"
version = "0.24.1-beta.1"
dependencies = [
"arrow",
"env_logger",

View File

@@ -21,16 +21,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.31.2", "features" = [
"dynamodb",
], "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.31.2", "tag" = "v0.31.2-beta.3", "git" = "https://github.com/lancedb/lance.git" }
lance = { "version" = "=0.31.1", features = ["dynamodb"] }
lance-io = { "version" = "=0.31.1" }
lance-index = { "version" = "=0.31.1" }
lance-linalg = { "version" = "=0.31.1" }
lance-table = { "version" = "=0.31.1" }
lance-testing = { "version" = "=0.31.1" }
lance-datafusion = { "version" = "=0.31.1" }
lance-encoding = { "version" = "=0.31.1" }
# Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false }
arrow-array = "55.1"

View File

@@ -47,10 +47,10 @@ def extract_features(line: str) -> list:
"""
import re
match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
match = re.search(r'"features"\s*=\s*\[(.*?)\]', line)
if match:
features_str = match.group(1)
return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
return [f.strip('"') for f in features_str.split(",")]
return []
@@ -63,24 +63,10 @@ def update_cargo_toml(line_updater):
lines = f.readlines()
new_lines = []
lance_line = ""
is_parsing_lance_line = False
for line in lines:
if line.startswith("lance"):
# Update the line using the provided function
if line.strip().endswith("}"):
new_lines.append(line_updater(line))
else:
lance_line = line
is_parsing_lance_line = True
elif is_parsing_lance_line:
lance_line += line
if line.strip().endswith("}"):
new_lines.append(line_updater(lance_line))
lance_line = ""
is_parsing_lance_line = False
else:
print("doesn't end with }:", line)
new_lines.append(line_updater(line))
else:
# Keep the line unchanged
new_lines.append(line)

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-beta.0</version>
<version>0.21.1-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.21.2-beta.0</version>
<version>0.21.1-final.0</version>
<packaging>pom</packaging>
<name>LanceDB Parent</name>

49
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.21.1",
"version": "0.21.1-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.21.1",
"version": "0.21.1-beta.1",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.21.1",
"@lancedb/vectordb-darwin-x64": "0.21.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
"@lancedb/vectordb-linux-x64-gnu": "0.21.1",
"@lancedb/vectordb-win32-x64-msvc": "0.21.1"
"@lancedb/vectordb-darwin-arm64": "0.21.1-beta.1",
"@lancedb/vectordb-darwin-x64": "0.21.1-beta.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.1-beta.1",
"@lancedb/vectordb-linux-x64-gnu": "0.21.1-beta.1",
"@lancedb/vectordb-win32-x64-msvc": "0.21.1-beta.1"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -327,65 +327,60 @@
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.1.tgz",
"integrity": "sha512-eXeOKgK5s7MSKDzA7Hl4/9E2X8tWWMNV7UJiFdwxrUcop86tM5ePBi8tApRnaQ3wBXrs99XTVBJ7+j+2gzilVA==",
"version": "0.21.1-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.1-beta.1.tgz",
"integrity": "sha512-D9SOLFb/40E2/9bt82xOti3jogRAaR1UkT2LfGZJw/0wBu8d8/xKjWgfm3d26S5K6in6DWsX1njLxevrFqD5HA==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.1.tgz",
"integrity": "sha512-vLoPWfg7OPw5vazLH5/YD/yQkZiTiPniuQgsH+xTodRfLf926lny53G7LQ6nFXNKIzX/jYKtg7AfMU8IcDLSEQ==",
"version": "0.21.1-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.1-beta.1.tgz",
"integrity": "sha512-JnZ41aDOJs6LWfI9t/+MnpqsK/Fj9r/hDdZSOjcQquLOcm2eP3NnvEnDvn+1pqWBN6ceqf1avTatPBGnD/yhNA==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.1.tgz",
"integrity": "sha512-IMAxtXj5aHCv9peziN77IxQpkYFj83KvI8zQCHzbMMXv7BspkhAd0PaUViqHqtTf2TUHjYQ66a7clZrEn+xQuQ==",
"version": "0.21.1-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.1-beta.1.tgz",
"integrity": "sha512-Xnw0wYtnfzVUr4DzppJCSx+HZdAHr6sqMC8SdaYNQ9XEjBZE20n5SO2AdBYjejbmONJ7lpGs3ydnLIZ6N40dAQ==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.1.tgz",
"integrity": "sha512-9oPOxBsYGngIhtC/oC+fQ9V0w9mgFuj2Wyler8f5UYQdiAutsTNyOUA+XjtcROjVZrZ5oUeIrvOQSte9BbpRTg==",
"version": "0.21.1-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.1-beta.1.tgz",
"integrity": "sha512-7S7gV13hv9Ho5W1Jat3FYiaMJOjRAwZOol7lKvOhU+sR/tJMEfZIOWAgymoqhAowbMtf+wwLoeKacfybXGET/w==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.21.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.1.tgz",
"integrity": "sha512-XqDXFLfdjNpDZ5jaqLerdx+sDU4YLuPK3VF4TowwcOlWDrUtI/L1lAyCaKxcyz1qE3VGuZvhNU89N5ioEICb4Q==",
"version": "0.21.1-beta.1",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.1-beta.1.tgz",
"integrity": "sha512-w6fEQA9IquvJ/GUYfiawRQvvdFD6OU44UW9JWm+FoscUFzdLiV7qmH4QjYEeEXQD7ob83ikFaxXGPTksYXpNOA==",
"cpu": [
"x64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"win32"

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"description": " Serverless, low-latency vector database for AI applications",
"private": false,
"main": "dist/index.js",
@@ -89,10 +89,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
"@lancedb/vectordb-darwin-x64": "0.21.1",
"@lancedb/vectordb-darwin-arm64": "0.21.1",
"@lancedb/vectordb-linux-x64-gnu": "0.21.1",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.1",
"@lancedb/vectordb-win32-x64-msvc": "0.21.1"
}
}

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.21.2-beta.0"
version = "0.21.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1706,60 +1706,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(mustNotResults.length).toBe(1);
});
test("full text search ngram", async () => {
const db = await connect(tmpDir.name);
const data = [
{ text: "hello world", vector: [0.1, 0.2, 0.3] },
{ text: "lance database", vector: [0.4, 0.5, 0.6] },
{ text: "lance is cool", vector: [0.7, 0.8, 0.9] },
];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts({ baseTokenizer: "ngram" }),
});
const results = await table.search("lan").toArray();
expect(results.length).toBe(2);
const resultSet = new Set(results.map((r) => r.text));
expect(resultSet.has("lance database")).toBe(true);
expect(resultSet.has("lance is cool")).toBe(true);
const results2 = await table.search("nce").toArray(); // spellchecker:disable-line
expect(results2.length).toBe(2);
const resultSet2 = new Set(results2.map((r) => r.text));
expect(resultSet2.has("lance database")).toBe(true);
expect(resultSet2.has("lance is cool")).toBe(true);
// the default min_ngram_length is 3, so "la" should not match
const results3 = await table.search("la").toArray();
expect(results3.length).toBe(0);
// test setting min_ngram_length and prefix_only
await table.createIndex("text", {
config: Index.fts({
baseTokenizer: "ngram",
ngramMinLength: 2,
prefixOnly: true,
}),
replace: true,
});
const results4 = await table.search("lan").toArray();
expect(results4.length).toBe(2);
const resultSet4 = new Set(results4.map((r) => r.text));
expect(resultSet4.has("lance database")).toBe(true);
expect(resultSet4.has("lance is cool")).toBe(true);
const results5 = await table.search("nce").toArray(); // spellchecker:disable-line
expect(results5.length).toBe(0);
const results6 = await table.search("la").toArray();
expect(results6.length).toBe(2);
const resultSet6 = new Set(results6.map((r) => r.text));
expect(resultSet6.has("lance database")).toBe(true);
expect(resultSet6.has("lance is cool")).toBe(true);
});
test.each([
[0.4, 0.5, 0.599], // number[]
Float32Array.of(0.4, 0.5, 0.599), // Float32Array

View File

@@ -439,7 +439,7 @@ export interface FtsOptions {
*
* "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
*/
baseTokenizer?: "simple" | "whitespace" | "raw" | "ngram";
baseTokenizer?: "simple" | "whitespace" | "raw";
/**
* language for stemming and stop words
@@ -472,21 +472,6 @@ export interface FtsOptions {
* whether to remove punctuation
*/
asciiFolding?: boolean;
/**
* ngram min length
*/
ngramMinLength?: number;
/**
* ngram max length
*/
ngramMaxLength?: number;
/**
* whether to only index the prefix of the token for ngram tokenizer
*/
prefixOnly?: boolean;
}
export class Index {
@@ -623,9 +608,6 @@ export class Index {
options?.stem,
options?.removeStopWords,
options?.asciiFolding,
options?.ngramMinLength,
options?.ngramMaxLength,
options?.prefixOnly,
),
);
}

View File

@@ -75,10 +75,10 @@ export interface OptimizeOptions {
* // Delete all versions older than 1 day
* const olderThan = new Date();
* olderThan.setDate(olderThan.getDate() - 1));
* tbl.optimize({cleanupOlderThan: olderThan});
* tbl.cleanupOlderVersions(olderThan);
*
* // Delete all versions except the current version
* tbl.optimize({cleanupOlderThan: new Date()});
* tbl.cleanupOlderVersions(new Date());
*/
cleanupOlderThan: Date;
deleteUnverified: boolean;

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.21.2-beta.0",
"version": "0.21.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.21.1",
"version": "0.21.1-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.21.1",
"version": "0.21.1-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.21.2-beta.0",
"version": "0.21.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -123,9 +123,6 @@ impl Index {
stem: Option<bool>,
remove_stop_words: Option<bool>,
ascii_folding: Option<bool>,
ngram_min_length: Option<u32>,
ngram_max_length: Option<u32>,
prefix_only: Option<bool>,
) -> Self {
let mut opts = FtsIndexBuilder::default();
if let Some(with_position) = with_position {
@@ -152,15 +149,6 @@ impl Index {
if let Some(ascii_folding) = ascii_folding {
opts = opts.ascii_folding(ascii_folding);
}
if let Some(ngram_min_length) = ngram_min_length {
opts = opts.ngram_min_length(ngram_min_length);
}
if let Some(ngram_max_length) = ngram_max_length {
opts = opts.ngram_max_length(ngram_max_length);
}
if let Some(prefix_only) = prefix_only {
opts = opts.ngram_prefix_only(prefix_only);
}
Self {
inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.24.2-beta.0"
current_version = "0.24.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.24.2-beta.0"
version = "0.24.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -137,9 +137,6 @@ class FTS:
stem: bool = True
remove_stop_words: bool = True
ascii_folding: bool = True
ngram_min_length: int = 3
ngram_max_length: int = 3
prefix_only: bool = False
@dataclass

View File

@@ -1374,8 +1374,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
if query_string is not None and not isinstance(query_string, str):
raise ValueError("Reranking currently only supports string queries")
self._str_query = query_string if query_string is not None else self._str_query
if reranker.score == "all":
self.with_row_id(True)
return self
def bypass_vector_index(self) -> LanceVectorQueryBuilder:
@@ -1571,8 +1569,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
The LanceQueryBuilder object.
"""
self._reranker = reranker
if reranker.score == "all":
self.with_row_id(True)
return self
@@ -1849,8 +1845,6 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
self._norm = normalize
self._reranker = reranker
if reranker.score == "all":
self.with_row_id(True)
return self

View File

@@ -158,9 +158,6 @@ class RemoteTable(Table):
stem: bool = True,
remove_stop_words: bool = True,
ascii_folding: bool = True,
ngram_min_length: int = 3,
ngram_max_length: int = 3,
prefix_only: bool = False,
):
config = FTS(
with_position=with_position,
@@ -171,9 +168,6 @@ class RemoteTable(Table):
stem=stem,
remove_stop_words=remove_stop_words,
ascii_folding=ascii_folding,
ngram_min_length=ngram_min_length,
ngram_max_length=ngram_max_length,
prefix_only=prefix_only,
)
LOOP.run(
self._table.create_index(

View File

@@ -74,7 +74,9 @@ class AnswerdotaiRerankers(Reranker):
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
raise NotImplementedError(
"Answerdotai Reranker does not support score='all' yet"
)
combined_results = combined_results.sort_by(
[("_relevance_score", "descending")]
)

View File

@@ -232,39 +232,6 @@ class Reranker(ABC):
return deduped_table
def _merge_and_keep_scores(self, vector_results: pa.Table, fts_results: pa.Table):
"""
Merge the results from the vector and FTS search and keep the scores.
This op is slower than just keeping relevance score but can be useful
for debugging.
"""
# add nulls to fts results for _distance
if "_distance" not in fts_results.column_names:
fts_results = fts_results.append_column(
"_distance",
pa.array([None] * len(fts_results), type=pa.float32()),
)
# add nulls to vector results for _score
if "_score" not in vector_results.column_names:
vector_results = vector_results.append_column(
"_score",
pa.array([None] * len(vector_results), type=pa.float32()),
)
# combine them and fill the scores
vector_results_dict = {row["_rowid"]: row for row in vector_results.to_pylist()}
fts_results_dict = {row["_rowid"]: row for row in fts_results.to_pylist()}
# merge them into vector_results
for key, value in fts_results_dict.items():
if key in vector_results_dict:
vector_results_dict[key]["_score"] = value["_score"]
else:
vector_results_dict[key] = value
combined = pa.Table.from_pylist(list(vector_results_dict.values()))
return combined
def _keep_relevance_score(self, combined_results: pa.Table):
if self.score == "relevance":
if "_score" in combined_results.column_names:

View File

@@ -92,14 +92,14 @@ class CohereReranker(Reranker):
vector_results: pa.Table,
fts_results: pa.Table,
):
if self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
else:
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self._rerank(combined_results, query)
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
raise NotImplementedError(
"return_score='all' not implemented for cohere reranker"
)
return combined_results
def rerank_vector(self, query: str, vector_results: pa.Table):

View File

@@ -81,15 +81,15 @@ class CrossEncoderReranker(Reranker):
vector_results: pa.Table,
fts_results: pa.Table,
):
if self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
else:
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self._rerank(combined_results, query)
# sort the results by _score
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
raise NotImplementedError(
"return_score='all' not implemented for CrossEncoderReranker"
)
combined_results = combined_results.sort_by(
[("_relevance_score", "descending")]
)

View File

@@ -97,14 +97,14 @@ class JinaReranker(Reranker):
vector_results: pa.Table,
fts_results: pa.Table,
):
if self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
else:
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self._rerank(combined_results, query)
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
raise NotImplementedError(
"return_score='all' not implemented for JinaReranker"
)
return combined_results
def rerank_vector(self, query: str, vector_results: pa.Table):

View File

@@ -88,13 +88,14 @@ class OpenaiReranker(Reranker):
vector_results: pa.Table,
fts_results: pa.Table,
):
if self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
else:
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self._rerank(combined_results, query)
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
raise NotImplementedError(
"OpenAI Reranker does not support score='all' yet"
)
combined_results = combined_results.sort_by(
[("_relevance_score", "descending")]

View File

@@ -94,14 +94,14 @@ class VoyageAIReranker(Reranker):
vector_results: pa.Table,
fts_results: pa.Table,
):
if self.score == "all":
combined_results = self._merge_and_keep_scores(vector_results, fts_results)
else:
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self.merge_results(vector_results, fts_results)
combined_results = self._rerank(combined_results, query)
if self.score == "relevance":
combined_results = self._keep_relevance_score(combined_results)
elif self.score == "all":
raise NotImplementedError(
"return_score='all' not implemented for voyageai reranker"
)
return combined_results
def rerank_vector(self, query: str, vector_results: pa.Table):

View File

@@ -838,9 +838,6 @@ class Table(ABC):
stem: bool = True,
remove_stop_words: bool = True,
ascii_folding: bool = True,
ngram_min_length: int = 3,
ngram_max_length: int = 3,
prefix_only: bool = False,
wait_timeout: Optional[timedelta] = None,
):
"""Create a full-text search index on the table.
@@ -880,7 +877,6 @@ class Table(ABC):
- "simple": Splits text by whitespace and punctuation.
- "whitespace": Split text by whitespace, but not punctuation.
- "raw": No tokenization. The entire text is treated as a single token.
- "ngram": N-Gram tokenizer.
language : str, default "English"
The language to use for tokenization.
max_token_length : int, default 40
@@ -898,12 +894,6 @@ class Table(ABC):
ascii_folding : bool, default True
Whether to fold ASCII characters. This converts accented characters to
their ASCII equivalent. For example, "café" would be converted to "cafe".
ngram_min_length: int, default 3
The minimum length of an n-gram.
ngram_max_length: int, default 3
The maximum length of an n-gram.
prefix_only: bool, default False
Whether to only index the prefix of the token for ngram tokenizer.
wait_timeout: timedelta, optional
The timeout to wait if indexing is asynchronous.
"""
@@ -1991,9 +1981,6 @@ class LanceTable(Table):
stem: bool = True,
remove_stop_words: bool = True,
ascii_folding: bool = True,
ngram_min_length: int = 3,
ngram_max_length: int = 3,
prefix_only: bool = False,
):
if not use_tantivy:
if not isinstance(field_names, str):
@@ -2009,9 +1996,6 @@ class LanceTable(Table):
"stem": stem,
"remove_stop_words": remove_stop_words,
"ascii_folding": ascii_folding,
"ngram_min_length": ngram_min_length,
"ngram_max_length": ngram_max_length,
"prefix_only": prefix_only,
}
else:
tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
@@ -2081,9 +2065,6 @@ class LanceTable(Table):
"stem": False,
"remove_stop_words": False,
"ascii_folding": False,
"ngram_min_length": 3,
"ngram_max_length": 3,
"prefix_only": False,
}
elif tokenizer_name == "raw":
return {
@@ -2094,9 +2075,6 @@ class LanceTable(Table):
"stem": False,
"remove_stop_words": False,
"ascii_folding": False,
"ngram_min_length": 3,
"ngram_max_length": 3,
"prefix_only": False,
}
elif tokenizer_name == "whitespace":
return {
@@ -2107,9 +2085,6 @@ class LanceTable(Table):
"stem": False,
"remove_stop_words": False,
"ascii_folding": False,
"ngram_min_length": 3,
"ngram_max_length": 3,
"prefix_only": False,
}
# or it's with language stemming with pattern like "en_stem"
@@ -2128,9 +2103,6 @@ class LanceTable(Table):
"stem": True,
"remove_stop_words": False,
"ascii_folding": False,
"ngram_min_length": 3,
"ngram_max_length": 3,
"prefix_only": False,
}
def add(

View File

@@ -25,4 +25,4 @@ IndexType = Literal[
]
# Tokenizer literals
BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
BaseTokenizerType = Literal["simple", "raw", "whitespace"]

View File

@@ -669,46 +669,3 @@ def test_fts_on_list(mem_db: DBConnection):
res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
assert len(res) == 2
def test_fts_ngram(mem_db: DBConnection):
data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
table = mem_db.create_table("test", data=data)
table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
results = table.search("lan", query_type="fts").limit(10).to_list()
assert len(results) == 2
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
results = (
table.search("nce", query_type="fts").limit(10).to_list()
) # spellchecker:disable-line
assert len(results) == 2
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
# the default min_ngram_length is 3, so "la" should not match
results = table.search("la", query_type="fts").limit(10).to_list()
assert len(results) == 0
# test setting min_ngram_length and prefix_only
table.create_fts_index(
"text",
use_tantivy=False,
base_tokenizer="ngram",
replace=True,
ngram_min_length=2,
prefix_only=True,
)
results = table.search("lan", query_type="fts").limit(10).to_list()
assert len(results) == 2
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
results = (
table.search("nce", query_type="fts").limit(10).to_list()
) # spellchecker:disable-line
assert len(results) == 0
results = table.search("la", query_type="fts").limit(10).to_list()
assert len(results) == 2
assert set(r["text"] for r in results) == {"lance database", "lance is cool"}

View File

@@ -272,9 +272,7 @@ async def test_distance_range_with_new_rows_async():
# append more rows so that execution plan would be mixed with ANN & Flat KNN
new_data = pa.table(
{
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
np.random.rand(4, 2) + 1
),
"vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
}
)
await table.add(new_data)

View File

@@ -499,19 +499,3 @@ def test_empty_result_reranker():
.rerank(reranker)
.to_arrow()
)
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
pytest.importorskip("sentence_transformers")
reranker = CrossEncoderReranker(return_score="all")
table, schema = get_test_table(tmp_path, use_tantivy)
query = "single player experience"
result = (
table.search(query, query_type="hybrid", vector_column_name="vector")
.rerank(reranker=reranker)
.to_arrow()
)
assert "_relevance_score" in result.column_names
assert "_score" in result.column_names
assert "_distance" in result.column_names

View File

@@ -47,10 +47,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
.max_token_length(params.max_token_length)
.remove_stop_words(params.remove_stop_words)
.stem(params.stem)
.ascii_folding(params.ascii_folding)
.ngram_min_length(params.ngram_min_length)
.ngram_max_length(params.ngram_max_length)
.ngram_prefix_only(params.prefix_only);
.ascii_folding(params.ascii_folding);
Ok(LanceDbIndex::FTS(inner_opts))
},
"IvfFlat" => {
@@ -133,9 +130,6 @@ struct FtsParams {
stem: bool,
remove_stop_words: bool,
ascii_folding: bool,
ngram_min_length: u32,
ngram_max_length: u32,
prefix_only: bool,
}
#[derive(FromPyObject)]

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.21.2-beta.0"
version = "0.21.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.21.2-beta.0"
version = "0.21.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -8,7 +8,7 @@ use std::path::Path;
use std::{collections::HashMap, sync::Arc};
use lance::dataset::{ReadParams, WriteMode};
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
use lance::io::{ObjectStore, ObjectStoreParams, ObjectStoreRegistry, WrappingObjectStore};
use lance_datafusion::utils::StreamingWriteSource;
use lance_encoding::version::LanceFileVersion;
use lance_table::io::commit::commit_handler_from_url;
@@ -217,9 +217,6 @@ pub struct ListingDatabase {
// Options for tables created by this connection
new_table_config: NewTableConfig,
// Session for object stores and caching
session: Arc<lance::session::Session>,
}
impl std::fmt::Display for ListingDatabase {
@@ -316,17 +313,13 @@ impl ListingDatabase {
let plain_uri = url.to_string();
let session = Arc::new(lance::session::Session::default());
let registry = Arc::new(ObjectStoreRegistry::default());
let os_params = ObjectStoreParams {
storage_options: Some(options.storage_options.clone()),
..Default::default()
};
let (object_store, base_path) = ObjectStore::from_uri_and_params(
session.store_registry(),
&plain_uri,
&os_params,
)
.await?;
let (object_store, base_path) =
ObjectStore::from_uri_and_params(registry, &plain_uri, &os_params).await?;
if object_store.is_local() {
Self::try_create_dir(&plain_uri).context(CreateDirSnafu { path: plain_uri })?;
}
@@ -349,7 +342,6 @@ impl ListingDatabase {
read_consistency_interval: request.read_consistency_interval,
storage_options: options.storage_options,
new_table_config: options.new_table_config,
session,
})
}
Err(_) => {
@@ -368,13 +360,7 @@ impl ListingDatabase {
read_consistency_interval: Option<std::time::Duration>,
new_table_config: NewTableConfig,
) -> Result<Self> {
let session = Arc::new(lance::session::Session::default());
let (object_store, base_path) = ObjectStore::from_uri_and_params(
session.store_registry(),
path,
&ObjectStoreParams::default(),
)
.await?;
let (object_store, base_path) = ObjectStore::from_uri(path).await?;
if object_store.is_local() {
Self::try_create_dir(path).context(CreateDirSnafu { path })?;
}
@@ -388,7 +374,6 @@ impl ListingDatabase {
read_consistency_interval,
storage_options: HashMap::new(),
new_table_config,
session,
})
}
@@ -456,128 +441,6 @@ impl ListingDatabase {
}
Ok(())
}
/// Inherit storage options from the connection into the target map
fn inherit_storage_options(&self, target: &mut HashMap<String, String>) {
for (key, value) in self.storage_options.iter() {
if !target.contains_key(key) {
target.insert(key.clone(), value.clone());
}
}
}
/// Extract storage option overrides from the request
fn extract_storage_overrides(
&self,
request: &CreateTableRequest,
) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
let storage_options = request
.write_options
.lance_write_params
.as_ref()
.and_then(|p| p.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref());
let storage_version_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION))
.map(|s| s.parse::<LanceFileVersion>())
.transpose()?;
let v2_manifest_override = storage_options
.and_then(|opts| opts.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS))
.map(|s| s.parse::<bool>())
.transpose()
.map_err(|_| Error::InvalidInput {
message: "enable_v2_manifest_paths must be a boolean".to_string(),
})?;
Ok((storage_version_override, v2_manifest_override))
}
/// Prepare write parameters for table creation
fn prepare_write_params(
&self,
request: &CreateTableRequest,
storage_version_override: Option<LanceFileVersion>,
v2_manifest_override: Option<bool>,
) -> lance::dataset::WriteParams {
let mut write_params = request
.write_options
.lance_write_params
.clone()
.unwrap_or_default();
// Only modify the storage options if we actually have something to
// inherit. There is a difference between storage_options=None and
// storage_options=Some({}). Using storage_options=None will cause the
// connection's session store registry to be used. Supplying Some({})
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let storage_options = write_params
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options);
}
write_params.data_storage_version = self
.new_table_config
.data_storage_version
.or(storage_version_override);
if let Some(enable_v2_manifest_paths) = self
.new_table_config
.enable_v2_manifest_paths
.or(v2_manifest_override)
{
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
}
if matches!(&request.mode, CreateTableMode::Overwrite) {
write_params.mode = WriteMode::Overwrite;
}
write_params.session = Some(self.session.clone());
write_params
}
/// Handle the case where table already exists based on the create mode
async fn handle_table_exists(
&self,
table_name: &str,
mode: CreateTableMode,
data_schema: &arrow_schema::Schema,
) -> Result<Arc<dyn BaseTable>> {
match mode {
CreateTableMode::Create => Err(Error::TableAlreadyExists {
name: table_name.to_string(),
}),
CreateTableMode::ExistOk(callback) => {
let req = OpenTableRequest {
name: table_name.to_string(),
index_cache_size: None,
lance_read_params: None,
};
let req = (callback)(req);
let table = self.open_table(req).await?;
let table_schema = table.schema().await?;
if table_schema.as_ref() != data_schema {
return Err(Error::Schema {
message: "Provided schema does not match existing table schema".to_string(),
});
}
Ok(table)
}
CreateTableMode::Overwrite => unreachable!(),
}
}
}
#[async_trait::async_trait]
@@ -612,14 +475,50 @@ impl Database for ListingDatabase {
Ok(f)
}
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
async fn create_table(&self, mut request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
let table_uri = self.table_uri(&request.name)?;
// Inherit storage options from the connection
let storage_options = request
.write_options
.lance_write_params
.get_or_insert_with(Default::default)
.store_params
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
for (key, value) in self.storage_options.iter() {
if !storage_options.contains_key(key) {
storage_options.insert(key.clone(), value.clone());
}
}
let (storage_version_override, v2_manifest_override) =
self.extract_storage_overrides(&request)?;
let storage_options = storage_options.clone();
let write_params =
self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
let mut write_params = request.write_options.lance_write_params.unwrap_or_default();
if let Some(storage_version) = &self.new_table_config.data_storage_version {
write_params.data_storage_version = Some(*storage_version);
} else {
// Allow the user to override the storage version via storage options (backwards compatibility)
if let Some(data_storage_version) = storage_options.get(OPT_NEW_TABLE_STORAGE_VERSION) {
write_params.data_storage_version = Some(data_storage_version.parse()?);
}
}
if let Some(enable_v2_manifest_paths) = self.new_table_config.enable_v2_manifest_paths {
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
} else {
// Allow the user to override the storage version via storage options (backwards compatibility)
if let Some(enable_v2_manifest_paths) = storage_options
.get(OPT_NEW_TABLE_V2_MANIFEST_PATHS)
.map(|s| s.parse::<bool>().unwrap())
{
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
}
}
if matches!(&request.mode, CreateTableMode::Overwrite) {
write_params.mode = WriteMode::Overwrite;
}
let data_schema = request.data.arrow_schema();
@@ -634,10 +533,30 @@ impl Database for ListingDatabase {
.await
{
Ok(table) => Ok(Arc::new(table)),
Err(Error::TableAlreadyExists { .. }) => {
self.handle_table_exists(&request.name, request.mode, &data_schema)
.await
}
Err(Error::TableAlreadyExists { name }) => match request.mode {
CreateTableMode::Create => Err(Error::TableAlreadyExists { name }),
CreateTableMode::ExistOk(callback) => {
let req = OpenTableRequest {
name: request.name.clone(),
index_cache_size: None,
lance_read_params: None,
};
let req = (callback)(req);
let table = self.open_table(req).await?;
let table_schema = table.schema().await?;
if table_schema != data_schema {
return Err(Error::Schema {
message: "Provided schema does not match existing table schema"
.to_string(),
});
}
Ok(table)
}
CreateTableMode::Overwrite => unreachable!(),
},
Err(err) => Err(err),
}
}
@@ -645,22 +564,18 @@ impl Database for ListingDatabase {
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
let table_uri = self.table_uri(&request.name)?;
// Only modify the storage options if we actually have something to
// inherit. There is a difference between storage_options=None and
// storage_options=Some({}). Using storage_options=None will cause the
// connection's session store registry to be used. Supplying Some({})
// will cause a new connection to be created, and that connection will
// be dropped from the cache when python GCs the table object, which
// confounds reuse across tables.
if !self.storage_options.is_empty() {
let storage_options = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
self.inherit_storage_options(storage_options);
// Inherit storage options from the connection
let storage_options = request
.lance_read_params
.get_or_insert_with(Default::default)
.store_options
.get_or_insert_with(Default::default)
.storage_options
.get_or_insert_with(Default::default);
for (key, value) in self.storage_options.iter() {
if !storage_options.contains_key(key) {
storage_options.insert(key.clone(), value.clone());
}
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
@@ -669,14 +584,13 @@ impl Database for ListingDatabase {
// If we have a user provided ReadParams use that
// If we don't then start with the default ReadParams and customize it with
// the options from the OpenTableBuilder
let mut read_params = request.lance_read_params.unwrap_or_else(|| {
let read_params = request.lance_read_params.unwrap_or_else(|| {
let mut default_params = ReadParams::default();
if let Some(index_cache_size) = request.index_cache_size {
default_params.index_cache_size = index_cache_size as usize;
}
default_params
});
read_params.session(self.session.clone());
let native_table = Arc::new(
NativeTable::open_with_params(

View File

@@ -281,46 +281,6 @@ async fn test_encryption() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn test_table_storage_options_override() -> Result<()> {
// Test that table-level storage options override connection-level options
let bucket = S3Bucket::new("test-override").await;
let key1 = KMSKey::new().await;
let key2 = KMSKey::new().await;
let uri = format!("s3://{}", bucket.0);
// Create connection with key1 encryption
let db = lancedb::connect(&uri)
.storage_options(CONFIG.iter().cloned())
.storage_option("aws_server_side_encryption", "aws:kms")
.storage_option("aws_sse_kms_key_id", &key1.0)
.execute()
.await?;
// Create table overriding with key2 encryption
let data = test_data();
let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
let _table = db
.create_table("test_override", data)
.storage_option("aws_sse_kms_key_id", &key2.0)
.execute()
.await?;
// Verify objects are encrypted with key2, not key1
validate_objects_encrypted(&bucket.0, "test_override", &key2.0).await;
// Also test that a table created without override uses connection settings
let data = test_data();
let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
let _table2 = db.create_table("test_inherit", data).execute().await?;
// Verify this table uses key1 from connection
validate_objects_encrypted(&bucket.0, "test_inherit", &key1.0).await;
Ok(())
}
struct DynamoDBCommitTable(String);
impl DynamoDBCommitTable {