mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 13:29:57 +00:00
Compare commits
16 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c625b6f2b2 | ||
|
|
bec8fe6547 | ||
|
|
dc1150c011 | ||
|
|
afaefc6264 | ||
|
|
cb70ff8cee | ||
|
|
cbb5a841b1 | ||
|
|
c72f6770fd | ||
|
|
e5a80a5e86 | ||
|
|
8d0a7fad1f | ||
|
|
b80d4d0134 | ||
|
|
9645fe52c2 | ||
|
|
b77314168d | ||
|
|
e08d45e090 | ||
|
|
2e3ddb8382 | ||
|
|
627ca4c810 | ||
|
|
f8dae4ffe9 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.20.0"
|
current_version = "0.20.1-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
10
.github/workflows/npm-publish.yml
vendored
10
.github/workflows/npm-publish.yml
vendored
@@ -541,10 +541,18 @@ jobs:
|
|||||||
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: main
|
||||||
- name: Update package-lock.json
|
- name: Update package-lock.json
|
||||||
run: bash ci/update_lockfiles.sh
|
run: |
|
||||||
|
git config user.name 'Lance Release'
|
||||||
|
git config user.email 'lance-dev@lancedb.com'
|
||||||
|
bash ci/update_lockfiles.sh
|
||||||
- name: Push new commit
|
- name: Push new commit
|
||||||
uses: ad-m/github-push-action@master
|
uses: ad-m/github-push-action@master
|
||||||
|
with:
|
||||||
|
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
|
||||||
|
branch: main
|
||||||
- name: Notify Slack Action
|
- name: Notify Slack Action
|
||||||
uses: ravsamhq/notify-slack-action@2.3.0
|
uses: ravsamhq/notify-slack-action@2.3.0
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|||||||
689
Cargo.lock
generated
689
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
16
Cargo.toml
16
Cargo.toml
@@ -21,14 +21,14 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.29.1", "features" = ["dynamodb"], tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance = { "version" = "=0.30.0", "features" = ["dynamodb"] }
|
||||||
lance-io = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-io = "=0.30.0"
|
||||||
lance-index = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-index = "=0.30.0"
|
||||||
lance-linalg = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-linalg = "=0.30.0"
|
||||||
lance-table = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-table = "=0.30.0"
|
||||||
lance-testing = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-testing = "=0.30.0"
|
||||||
lance-datafusion = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-datafusion = "=0.30.0"
|
||||||
lance-encoding = { version = "=0.29.1", tag = "v0.29.1-beta.1", git="https://github.com/lancedb/lance.git" }
|
lance-encoding = "=0.30.0"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "55.1", optional = false }
|
arrow = { version = "55.1", optional = false }
|
||||||
arrow-array = "55.1"
|
arrow-array = "55.1"
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.20.0-final.0</version>
|
<version>0.20.1-beta.2</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.20.0-final.0</version>
|
<version>0.20.1-beta.2</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
44
node/package-lock.json
generated
44
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.20.0",
|
"@lancedb/vectordb-darwin-arm64": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.20.0",
|
"@lancedb/vectordb-darwin-x64": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.20.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.20.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.20.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.2"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.1-beta.2.tgz",
|
||||||
"integrity": "sha512-PEL4vFY42PaWPPnOfOcFBv1E+zumhZPMlQW7/M00ZA8O2uKiTc1xhajhaPcwVDZBYo36SRSIxUz2eYjXWA9sIw==",
|
"integrity": "sha512-mqi0yI+ZwBTydaDy1FRHAUZwrWS28u6tbHTe1s4uSrmERbVI6PfmoPR+NZWWAp6ZhlseSdl/+yeI4imk11rQSw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -339,9 +339,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.1-beta.2.tgz",
|
||||||
"integrity": "sha512-4A1f9DiyGhziN9P81jSmMgzXSc1XXM9bIJw5q/b2NmDoiqIr8tYv1FKdm0JDhMYjtnzBeNpc67gVy3GlGCuUWA==",
|
"integrity": "sha512-m8EYYA8JZIeNsJqQsBDUMu6r31/u7FzpjonJ4Y+CjapVl6UdvI65KUkeL2dYrFao++RuIoaiqcm3e7gRgFZpXQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -351,9 +351,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.1-beta.2.tgz",
|
||||||
"integrity": "sha512-A3teZC/zU0tccluIJZsTasP8vBQWhXsmvLOo9UopSeyCrA1sR2vEyvXV9hMRJo7+9QjOrYFLiFWPjXEdFb+/1Q==",
|
"integrity": "sha512-3Og2+bk4GlWmMO1Yg2HBfeb5zrOMLaIHD7bEqQ4+6yw4IckAaV+ke05H0tyyqmOVrOQ0LpvtXgD7pPztjm9r9A==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -363,9 +363,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.1-beta.2.tgz",
|
||||||
"integrity": "sha512-uREL9YF5iaeyfYh+5uvkSLQquFXYQoJyuDMPMZTwOE/Zghgw3lRl6KHIoMVCOfw+S8tkeyzU8UR4zgrbymbPGg==",
|
"integrity": "sha512-mwTQyA/FBoU/FkPuvCNBZG3y83gBN+iYoejehBH2HBkLUIcmlsDgSRZ1OQ+f9ijj12EMBCA11tBUPA9zhHzyrw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -375,9 +375,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.1-beta.2.tgz",
|
||||||
"integrity": "sha512-0G5FD8X9S70hH4QK4S2m7TrWCIlVr4vox4Rjhfqdxk/5QWwYVT6WltvPgTJlektI7sUWeioDNmluHzqLZKDlHQ==",
|
"integrity": "sha512-VkjNpqhK3l3uHLLPmox+HrmKPMaZgV+qsGQWx0nfseGnSOEmXAWZWQFe0APVCQ9y0xTypQB0oH7eSOPZv2t4WQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"private": false,
|
"private": false,
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
@@ -89,10 +89,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.20.0",
|
"@lancedb/vectordb-darwin-x64": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.20.0",
|
"@lancedb/vectordb-darwin-arm64": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.20.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.20.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.20.1-beta.2",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.20.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.20.1-beta.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.20.0"
|
version = "0.20.1-beta.2"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -592,14 +592,14 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
).rejects.toThrow("column vector was missing");
|
).rejects.toThrow("column vector was missing");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("will provide a nice error if run twice", async function () {
|
it("will skip embedding application if already applied", async function () {
|
||||||
const records = sampleRecords();
|
const records = sampleRecords();
|
||||||
const table = await convertToTable(records, dummyEmbeddingConfig);
|
const table = await convertToTable(records, dummyEmbeddingConfig);
|
||||||
|
|
||||||
// fromTableToBuffer will try and apply the embeddings again
|
// fromTableToBuffer will try and apply the embeddings again
|
||||||
await expect(
|
// but should skip since the column already has non-null values
|
||||||
fromTableToBuffer(table, dummyEmbeddingConfig),
|
const result = await fromTableToBuffer(table, dummyEmbeddingConfig);
|
||||||
).rejects.toThrow("already existed");
|
expect(result.byteLength).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1650,13 +1650,25 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(resultSet.has("fob")).toBe(true);
|
expect(resultSet.has("fob")).toBe(true);
|
||||||
expect(resultSet.has("fo")).toBe(true);
|
expect(resultSet.has("fo")).toBe(true);
|
||||||
expect(resultSet.has("food")).toBe(true);
|
expect(resultSet.has("food")).toBe(true);
|
||||||
|
|
||||||
|
const prefixResults = await table
|
||||||
|
.search(
|
||||||
|
new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
|
||||||
|
)
|
||||||
|
.toArray();
|
||||||
|
expect(prefixResults.length).toBe(2);
|
||||||
|
const resultSet2 = new Set(prefixResults.map((r) => r.text));
|
||||||
|
expect(resultSet2.has("foo")).toBe(true);
|
||||||
|
expect(resultSet2.has("food")).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("full text search boolean query", async () => {
|
test("full text search boolean query", async () => {
|
||||||
const db = await connect(tmpDir.name);
|
const db = await connect(tmpDir.name);
|
||||||
const data = [
|
const data = [
|
||||||
{ text: "hello world", vector: [0.1, 0.2, 0.3] },
|
{ text: "The cat and dog are playing" },
|
||||||
{ text: "goodbye world", vector: [0.4, 0.5, 0.6] },
|
{ text: "The cat is sleeping" },
|
||||||
|
{ text: "The dog is barking" },
|
||||||
|
{ text: "The dog chases the cat" },
|
||||||
];
|
];
|
||||||
const table = await db.createTable("test", data);
|
const table = await db.createTable("test", data);
|
||||||
await table.createIndex("text", {
|
await table.createIndex("text", {
|
||||||
@@ -1666,22 +1678,32 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
const shouldResults = await table
|
const shouldResults = await table
|
||||||
.search(
|
.search(
|
||||||
new BooleanQuery([
|
new BooleanQuery([
|
||||||
[Occur.Should, new MatchQuery("hello", "text")],
|
[Occur.Should, new MatchQuery("cat", "text")],
|
||||||
[Occur.Should, new MatchQuery("goodbye", "text")],
|
[Occur.Should, new MatchQuery("dog", "text")],
|
||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
.toArray();
|
.toArray();
|
||||||
expect(shouldResults.length).toBe(2);
|
expect(shouldResults.length).toBe(4);
|
||||||
|
|
||||||
const mustResults = await table
|
const mustResults = await table
|
||||||
.search(
|
.search(
|
||||||
new BooleanQuery([
|
new BooleanQuery([
|
||||||
[Occur.Must, new MatchQuery("hello", "text")],
|
[Occur.Must, new MatchQuery("cat", "text")],
|
||||||
[Occur.Must, new MatchQuery("world", "text")],
|
[Occur.Must, new MatchQuery("dog", "text")],
|
||||||
]),
|
]),
|
||||||
)
|
)
|
||||||
.toArray();
|
.toArray();
|
||||||
expect(mustResults.length).toBe(1);
|
expect(mustResults.length).toBe(2);
|
||||||
|
|
||||||
|
const mustNotResults = await table
|
||||||
|
.search(
|
||||||
|
new BooleanQuery([
|
||||||
|
[Occur.Must, new MatchQuery("cat", "text")],
|
||||||
|
[Occur.MustNot, new MatchQuery("dog", "text")],
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
.toArray();
|
||||||
|
expect(mustNotResults.length).toBe(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
test.each([
|
test.each([
|
||||||
|
|||||||
@@ -417,7 +417,9 @@ function inferSchema(
|
|||||||
} else {
|
} else {
|
||||||
const inferredType = inferType(value, path, opts);
|
const inferredType = inferType(value, path, opts);
|
||||||
if (inferredType === undefined) {
|
if (inferredType === undefined) {
|
||||||
throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
|
throw new Error(`Failed to infer data type for field ${path.join(
|
||||||
|
".",
|
||||||
|
)} at row ${rowI}. \
|
||||||
Consider providing an explicit schema.`);
|
Consider providing an explicit schema.`);
|
||||||
}
|
}
|
||||||
pathTree.set(path, inferredType);
|
pathTree.set(path, inferredType);
|
||||||
@@ -799,11 +801,17 @@ async function applyEmbeddingsFromMetadata(
|
|||||||
`Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
|
`Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if destination column exists and handle accordingly
|
||||||
if (columns[destColumn] !== undefined) {
|
if (columns[destColumn] !== undefined) {
|
||||||
throw new Error(
|
const existingColumn = columns[destColumn];
|
||||||
`Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
|
// If the column exists but is all null, we can fill it with embeddings
|
||||||
);
|
if (existingColumn.nullCount !== existingColumn.length) {
|
||||||
|
// Column has non-null values, skip embedding application
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (table.batches.length > 1) {
|
if (table.batches.length > 1) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
|
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
|
||||||
@@ -903,11 +911,23 @@ async function applyEmbeddings<T>(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Check if destination column exists and handle accordingly
|
||||||
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
|
if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
|
||||||
throw new Error(
|
const existingColumn = newColumns[destColumn];
|
||||||
`Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
|
// If the column exists but is all null, we can fill it with embeddings
|
||||||
|
if (existingColumn.nullCount !== existingColumn.length) {
|
||||||
|
// Column has non-null values, skip embedding application and return table as-is
|
||||||
|
let newTable = new ArrowTable(newColumns);
|
||||||
|
if (schema != null) {
|
||||||
|
newTable = alignTable(newTable, schema as Schema);
|
||||||
|
}
|
||||||
|
return new ArrowTable(
|
||||||
|
new Schema(newTable.schema.fields, schemaMetadata),
|
||||||
|
newTable.batches,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (table.batches.length > 1) {
|
if (table.batches.length > 1) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
|
"Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
|
||||||
|
|||||||
@@ -812,10 +812,12 @@ export enum Operator {
|
|||||||
*
|
*
|
||||||
* - `Must`: The term must be present in the document.
|
* - `Must`: The term must be present in the document.
|
||||||
* - `Should`: The term should contribute to the document score, but is not required.
|
* - `Should`: The term should contribute to the document score, but is not required.
|
||||||
|
* - `MustNot`: The term must not be present in the document.
|
||||||
*/
|
*/
|
||||||
export enum Occur {
|
export enum Occur {
|
||||||
Must = "MUST",
|
|
||||||
Should = "SHOULD",
|
Should = "SHOULD",
|
||||||
|
Must = "MUST",
|
||||||
|
MustNot = "MUST_NOT",
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -856,6 +858,7 @@ export class MatchQuery implements FullTextQuery {
|
|||||||
* - `fuzziness`: The fuzziness level for the query (default is 0).
|
* - `fuzziness`: The fuzziness level for the query (default is 0).
|
||||||
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
* - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
|
||||||
* - `operator`: The logical operator to use for combining terms in the query (default is "OR").
|
* - `operator`: The logical operator to use for combining terms in the query (default is "OR").
|
||||||
|
* - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
|
||||||
*/
|
*/
|
||||||
constructor(
|
constructor(
|
||||||
query: string,
|
query: string,
|
||||||
@@ -865,6 +868,7 @@ export class MatchQuery implements FullTextQuery {
|
|||||||
fuzziness?: number;
|
fuzziness?: number;
|
||||||
maxExpansions?: number;
|
maxExpansions?: number;
|
||||||
operator?: Operator;
|
operator?: Operator;
|
||||||
|
prefixLength?: number;
|
||||||
},
|
},
|
||||||
) {
|
) {
|
||||||
let fuzziness = options?.fuzziness;
|
let fuzziness = options?.fuzziness;
|
||||||
@@ -878,6 +882,7 @@ export class MatchQuery implements FullTextQuery {
|
|||||||
fuzziness,
|
fuzziness,
|
||||||
options?.maxExpansions ?? 50,
|
options?.maxExpansions ?? 50,
|
||||||
options?.operator ?? Operator.Or,
|
options?.operator ?? Operator.Or,
|
||||||
|
options?.prefixLength ?? 0,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.20.0",
|
"version": "0.20.1-beta.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -335,6 +335,7 @@ impl JsFullTextQuery {
|
|||||||
fuzziness: Option<u32>,
|
fuzziness: Option<u32>,
|
||||||
max_expansions: u32,
|
max_expansions: u32,
|
||||||
operator: String,
|
operator: String,
|
||||||
|
prefix_length: u32,
|
||||||
) -> napi::Result<Self> {
|
) -> napi::Result<Self> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
inner: MatchQuery::new(query)
|
inner: MatchQuery::new(query)
|
||||||
@@ -347,6 +348,7 @@ impl JsFullTextQuery {
|
|||||||
napi::Error::from_reason(format!("Invalid operator: {}", e))
|
napi::Error::from_reason(format!("Invalid operator: {}", e))
|
||||||
})?,
|
})?,
|
||||||
)
|
)
|
||||||
|
.with_prefix_length(prefix_length)
|
||||||
.into(),
|
.into(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.23.1-beta.0"
|
current_version = "0.24.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.23.1-beta.0"
|
version = "0.24.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -101,8 +101,9 @@ class FullTextOperator(str, Enum):
|
|||||||
|
|
||||||
|
|
||||||
class Occur(str, Enum):
|
class Occur(str, Enum):
|
||||||
MUST = "MUST"
|
|
||||||
SHOULD = "SHOULD"
|
SHOULD = "SHOULD"
|
||||||
|
MUST = "MUST"
|
||||||
|
MUST_NOT = "MUST_NOT"
|
||||||
|
|
||||||
|
|
||||||
@pydantic.dataclasses.dataclass
|
@pydantic.dataclasses.dataclass
|
||||||
@@ -181,6 +182,9 @@ class MatchQuery(FullTextQuery):
|
|||||||
Can be either `AND` or `OR`.
|
Can be either `AND` or `OR`.
|
||||||
If `AND`, all terms in the query must match.
|
If `AND`, all terms in the query must match.
|
||||||
If `OR`, at least one term in the query must match.
|
If `OR`, at least one term in the query must match.
|
||||||
|
prefix_length : int, optional
|
||||||
|
The number of beginning characters being unchanged for fuzzy matching.
|
||||||
|
This is useful to achieve prefix matching.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
query: str
|
query: str
|
||||||
@@ -189,6 +193,7 @@ class MatchQuery(FullTextQuery):
|
|||||||
fuzziness: int = pydantic.Field(0, kw_only=True)
|
fuzziness: int = pydantic.Field(0, kw_only=True)
|
||||||
max_expansions: int = pydantic.Field(50, kw_only=True)
|
max_expansions: int = pydantic.Field(50, kw_only=True)
|
||||||
operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
|
operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
|
||||||
|
prefix_length: int = pydantic.Field(0, kw_only=True)
|
||||||
|
|
||||||
def query_type(self) -> FullTextQueryType:
|
def query_type(self) -> FullTextQueryType:
|
||||||
return FullTextQueryType.MATCH
|
return FullTextQueryType.MATCH
|
||||||
@@ -1446,10 +1451,13 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
|
|
||||||
query = self._query
|
query = self._query
|
||||||
if self._phrase_query:
|
if self._phrase_query:
|
||||||
raise NotImplementedError(
|
if isinstance(query, str):
|
||||||
"Phrase query is not yet supported in Lance FTS. "
|
if not query.startswith('"') or not query.endswith('"'):
|
||||||
"Use tantivy-based index instead for now."
|
query = f'"{query}"'
|
||||||
)
|
elif isinstance(query, FullTextQuery) and not isinstance(
|
||||||
|
query, PhraseQuery
|
||||||
|
):
|
||||||
|
raise TypeError("Please use PhraseQuery for phrase queries.")
|
||||||
query = self.to_query_object()
|
query = self.to_query_object()
|
||||||
results = self._table._execute_query(query, timeout=timeout)
|
results = self._table._execute_query(query, timeout=timeout)
|
||||||
results = results.read_all()
|
results = results.read_all()
|
||||||
|
|||||||
@@ -827,7 +827,7 @@ class Table(ABC):
|
|||||||
ordering_field_names: Optional[Union[str, List[str]]] = None,
|
ordering_field_names: Optional[Union[str, List[str]]] = None,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
|
writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
|
||||||
use_tantivy: bool = True,
|
use_tantivy: bool = False,
|
||||||
tokenizer_name: Optional[str] = None,
|
tokenizer_name: Optional[str] = None,
|
||||||
with_position: bool = False,
|
with_position: bool = False,
|
||||||
# tokenizer configs:
|
# tokenizer configs:
|
||||||
@@ -864,7 +864,7 @@ class Table(ABC):
|
|||||||
The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
|
The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
|
||||||
language code followed by "_stem". So for english it would be "en_stem".
|
language code followed by "_stem". So for english it would be "en_stem".
|
||||||
For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
|
For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
|
||||||
use_tantivy: bool, default True
|
use_tantivy: bool, default False
|
||||||
If True, use the legacy full-text search implementation based on tantivy.
|
If True, use the legacy full-text search implementation based on tantivy.
|
||||||
If False, use the new full-text search implementation based on lance-index.
|
If False, use the new full-text search implementation based on lance-index.
|
||||||
with_position: bool, default False
|
with_position: bool, default False
|
||||||
@@ -1970,7 +1970,7 @@ class LanceTable(Table):
|
|||||||
ordering_field_names: Optional[Union[str, List[str]]] = None,
|
ordering_field_names: Optional[Union[str, List[str]]] = None,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
|
writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
|
||||||
use_tantivy: bool = True,
|
use_tantivy: bool = False,
|
||||||
tokenizer_name: Optional[str] = None,
|
tokenizer_name: Optional[str] = None,
|
||||||
with_position: bool = False,
|
with_position: bool = False,
|
||||||
# tokenizer configs:
|
# tokenizer configs:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import lancedb
|
|||||||
|
|
||||||
# --8<-- [end:import-lancedb]
|
# --8<-- [end:import-lancedb]
|
||||||
# --8<-- [start:import-numpy]
|
# --8<-- [start:import-numpy]
|
||||||
from lancedb.query import BoostQuery, MatchQuery
|
from lancedb.query import BooleanQuery, BoostQuery, MatchQuery, Occur
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
@@ -191,6 +191,15 @@ def test_fts_fuzzy_query():
|
|||||||
"food", # 1 insertion
|
"food", # 1 insertion
|
||||||
}
|
}
|
||||||
|
|
||||||
|
results = table.search(
|
||||||
|
MatchQuery("foo", "text", fuzziness=1, prefix_length=3)
|
||||||
|
).to_pandas()
|
||||||
|
assert len(results) == 2
|
||||||
|
assert set(results["text"].to_list()) == {
|
||||||
|
"foo",
|
||||||
|
"food",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
|
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
|
||||||
@@ -240,6 +249,60 @@ def test_fts_boost_query():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
|
||||||
|
)
|
||||||
|
def test_fts_boolean_query(tmp_path):
|
||||||
|
uri = tmp_path / "boolean-example"
|
||||||
|
db = lancedb.connect(uri)
|
||||||
|
table = db.create_table(
|
||||||
|
"my_table_fts_boolean",
|
||||||
|
data=[
|
||||||
|
{"text": "The cat and dog are playing"},
|
||||||
|
{"text": "The cat is sleeping"},
|
||||||
|
{"text": "The dog is barking"},
|
||||||
|
{"text": "The dog chases the cat"},
|
||||||
|
],
|
||||||
|
mode="overwrite",
|
||||||
|
)
|
||||||
|
table.create_fts_index("text", use_tantivy=False, replace=True)
|
||||||
|
|
||||||
|
# SHOULD
|
||||||
|
results = table.search(
|
||||||
|
MatchQuery("cat", "text") | MatchQuery("dog", "text")
|
||||||
|
).to_pandas()
|
||||||
|
assert len(results) == 4
|
||||||
|
assert set(results["text"].to_list()) == {
|
||||||
|
"The cat and dog are playing",
|
||||||
|
"The cat is sleeping",
|
||||||
|
"The dog is barking",
|
||||||
|
"The dog chases the cat",
|
||||||
|
}
|
||||||
|
# MUST
|
||||||
|
results = table.search(
|
||||||
|
MatchQuery("cat", "text") & MatchQuery("dog", "text")
|
||||||
|
).to_pandas()
|
||||||
|
assert len(results) == 2
|
||||||
|
assert set(results["text"].to_list()) == {
|
||||||
|
"The cat and dog are playing",
|
||||||
|
"The dog chases the cat",
|
||||||
|
}
|
||||||
|
|
||||||
|
# MUST NOT
|
||||||
|
results = table.search(
|
||||||
|
BooleanQuery(
|
||||||
|
[
|
||||||
|
(Occur.MUST, MatchQuery("cat", "text")),
|
||||||
|
(Occur.MUST_NOT, MatchQuery("dog", "text")),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
).to_pandas()
|
||||||
|
assert len(results) == 1
|
||||||
|
assert set(results["text"].to_list()) == {
|
||||||
|
"The cat is sleeping",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
|
os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -245,7 +245,7 @@ def test_s3_dynamodb_sync(s3_bucket: str, commit_table: str, monkeypatch):
|
|||||||
NotImplementedError,
|
NotImplementedError,
|
||||||
match="Full-text search is only supported on the local filesystem",
|
match="Full-text search is only supported on the local filesystem",
|
||||||
):
|
):
|
||||||
table.create_fts_index("x")
|
table.create_fts_index("x", use_tantivy=True)
|
||||||
|
|
||||||
# make sure list tables still works
|
# make sure list tables still works
|
||||||
assert db.table_names() == ["test_ddb_sync"]
|
assert db.table_names() == ["test_ddb_sync"]
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
|
|||||||
let fuzziness = ob.getattr("fuzziness")?.extract()?;
|
let fuzziness = ob.getattr("fuzziness")?.extract()?;
|
||||||
let max_expansions = ob.getattr("max_expansions")?.extract()?;
|
let max_expansions = ob.getattr("max_expansions")?.extract()?;
|
||||||
let operator = ob.getattr("operator")?.extract::<String>()?;
|
let operator = ob.getattr("operator")?.extract::<String>()?;
|
||||||
|
let prefix_length = ob.getattr("prefix_length")?.extract()?;
|
||||||
|
|
||||||
Ok(PyLanceDB(
|
Ok(PyLanceDB(
|
||||||
MatchQuery::new(query)
|
MatchQuery::new(query)
|
||||||
@@ -60,6 +61,7 @@ impl FromPyObject<'_> for PyLanceDB<FtsQuery> {
|
|||||||
.with_operator(Operator::try_from(operator.as_str()).map_err(|e| {
|
.with_operator(Operator::try_from(operator.as_str()).map_err(|e| {
|
||||||
PyValueError::new_err(format!("Invalid operator: {}", e))
|
PyValueError::new_err(format!("Invalid operator: {}", e))
|
||||||
})?)
|
})?)
|
||||||
|
.with_prefix_length(prefix_length)
|
||||||
.into(),
|
.into(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
@@ -139,7 +141,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
|
|||||||
kwargs.set_item("boost", query.boost)?;
|
kwargs.set_item("boost", query.boost)?;
|
||||||
kwargs.set_item("fuzziness", query.fuzziness)?;
|
kwargs.set_item("fuzziness", query.fuzziness)?;
|
||||||
kwargs.set_item("max_expansions", query.max_expansions)?;
|
kwargs.set_item("max_expansions", query.max_expansions)?;
|
||||||
kwargs.set_item("operator", operator_to_str(query.operator))?;
|
kwargs.set_item::<_, &str>("operator", query.operator.into())?;
|
||||||
|
kwargs.set_item("prefix_length", query.prefix_length)?;
|
||||||
namespace
|
namespace
|
||||||
.getattr(intern!(py, "MatchQuery"))?
|
.getattr(intern!(py, "MatchQuery"))?
|
||||||
.call((query.terms, query.column.unwrap()), Some(&kwargs))
|
.call((query.terms, query.column.unwrap()), Some(&kwargs))
|
||||||
@@ -169,19 +172,25 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
|
|||||||
.unzip();
|
.unzip();
|
||||||
let kwargs = PyDict::new(py);
|
let kwargs = PyDict::new(py);
|
||||||
kwargs.set_item("boosts", boosts)?;
|
kwargs.set_item("boosts", boosts)?;
|
||||||
kwargs.set_item("operator", operator_to_str(first.operator))?;
|
kwargs.set_item::<_, &str>("operator", first.operator.into())?;
|
||||||
namespace
|
namespace
|
||||||
.getattr(intern!(py, "MultiMatchQuery"))?
|
.getattr(intern!(py, "MultiMatchQuery"))?
|
||||||
.call((first.terms.clone(), columns), Some(&kwargs))
|
.call((first.terms.clone(), columns), Some(&kwargs))
|
||||||
}
|
}
|
||||||
FtsQuery::Boolean(query) => {
|
FtsQuery::Boolean(query) => {
|
||||||
let mut queries = Vec::with_capacity(query.must.len() + query.should.len());
|
let mut queries: Vec<(&str, Bound<'py, PyAny>)> = Vec::with_capacity(
|
||||||
for q in query.must {
|
query.should.len() + query.must.len() + query.must_not.len(),
|
||||||
queries.push((occur_to_str(Occur::Must), PyLanceDB(q).into_pyobject(py)?));
|
);
|
||||||
}
|
|
||||||
for q in query.should {
|
for q in query.should {
|
||||||
queries.push((occur_to_str(Occur::Should), PyLanceDB(q).into_pyobject(py)?));
|
queries.push((Occur::Should.into(), PyLanceDB(q).into_pyobject(py)?));
|
||||||
}
|
}
|
||||||
|
for q in query.must {
|
||||||
|
queries.push((Occur::Must.into(), PyLanceDB(q).into_pyobject(py)?));
|
||||||
|
}
|
||||||
|
for q in query.must_not {
|
||||||
|
queries.push((Occur::MustNot.into(), PyLanceDB(q).into_pyobject(py)?));
|
||||||
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
.getattr(intern!(py, "BooleanQuery"))?
|
.getattr(intern!(py, "BooleanQuery"))?
|
||||||
.call1((queries,))
|
.call1((queries,))
|
||||||
@@ -190,20 +199,6 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn operator_to_str(op: Operator) -> &'static str {
|
|
||||||
match op {
|
|
||||||
Operator::And => "AND",
|
|
||||||
Operator::Or => "OR",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn occur_to_str(occur: Occur) -> &'static str {
|
|
||||||
match occur {
|
|
||||||
Occur::Must => "MUST",
|
|
||||||
Occur::Should => "SHOULD",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Python representation of query vector(s)
|
// Python representation of query vector(s)
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct PyQueryVectors(Vec<Arc<dyn Array>>);
|
pub struct PyQueryVectors(Vec<Arc<dyn Array>>);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.20.0"
|
version = "0.20.1-beta.2"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.20.0"
|
version = "0.20.1-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -2318,6 +2318,7 @@ mod tests {
|
|||||||
"fuzziness": 0,
|
"fuzziness": 0,
|
||||||
"max_expansions": 50,
|
"max_expansions": 50,
|
||||||
"operator": "Or",
|
"operator": "Or",
|
||||||
|
"prefix_length": 0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user