Compare commits

..

7 Commits

Author SHA1 Message Date
lancedb automation
1ade8846fd chore: update lance dependency to v4.0.0-beta.6 2026-03-03 18:54:14 +00:00
Xuanwo
52ce2c995c fix(ci): only run npm publish on release tags (#3093)
This PR fixes the npm publish dry-run failure for prerelease versions
without changing the existing workflow trigger behavior. The publish
step now detects prerelease versions from `nodejs/package.json` and
always appends `--tag preview` when needed.

Context:
- On `main` pushes, the workflow still runs `npm publish --dry-run` by
design.
- Recent failures were caused by prerelease versions (for example
`0.27.0-beta.3`) running without `--tag`, which npm rejects.
- The previous `refs/tags/v...-beta...` check did not apply on branch
pushes, so dry-run could fail even though release tags worked.
2026-03-04 01:35:10 +08:00
Sean Mackrory
e71a00998c ci: add regression test for fastSearch in FTS queries in TypeScript (#3090)
We recently added support for this for the Python bindings, and wanted
to confirm this already worked as expected in the TS bindings.
2026-03-03 07:09:09 -08:00
Sean Mackrory
39a2ac0a1c feat: add parity between fast_search keyword argument between vector and FTS searches (#3091)
We don't necessarily need to do this, but one user was confused having
used `fast_search=True` as a keyword argument for vector searches, but
being unable to do so for FTS, even after the most recent changes. I
think this is the only discrepancy in where that is possible.
2026-03-03 05:21:36 -08:00
Wyatt Alt
bc7b344fa4 feat: add support for remote index params (#3087)
Prior to this commit the remote SDK did not support the full set of
index parameters. This extends the SDK to support them.
2026-03-02 11:14:28 -08:00
Will Jones
f91d2f5fec ci(python): pin maturin to work around bug (#3088)
Work around for https://github.com/PyO3/maturin/issues/3059
2026-03-02 09:38:54 -08:00
Wyatt Alt
cf81b6419f feat: add num_deleted_rows to delete result (#3077) 2026-03-02 08:37:14 -08:00
16 changed files with 308 additions and 179 deletions

View File

@@ -29,6 +29,7 @@ runs:
if: ${{ inputs.arm-build == 'false' }}
uses: PyO3/maturin-action@v1
with:
maturin-version: "1.12.4"
command: build
working-directory: python
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
@@ -44,6 +45,7 @@ runs:
if: ${{ inputs.arm-build == 'true' }}
uses: PyO3/maturin-action@v1
with:
maturin-version: "1.12.4"
command: build
working-directory: python
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"

View File

@@ -20,6 +20,7 @@ runs:
uses: PyO3/maturin-action@v1
with:
command: build
maturin-version: "1.12.4"
# TODO: pass through interpreter
args: ${{ inputs.args }}
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"

View File

@@ -25,6 +25,7 @@ runs:
uses: PyO3/maturin-action@v1
with:
command: build
maturin-version: "1.12.4"
args: ${{ inputs.args }}
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
working-directory: python

View File

@@ -356,7 +356,8 @@ jobs:
if [[ $DRY_RUN == "true" ]]; then
ARGS="$ARGS --dry-run"
fi
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
VERSION=$(node -p "require('./package.json').version")
if [[ $VERSION == *-* ]]; then
ARGS="$ARGS --tag preview"
fi
npm publish $ARGS

View File

@@ -10,6 +10,10 @@ on:
- python/**
- rust/**
- .github/workflows/python.yml
- .github/workflows/build_linux_wheel/**
- .github/workflows/build_mac_wheel/**
- .github/workflows/build_windows_wheel/**
- .github/workflows/run_tests/**
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

111
Cargo.lock generated
View File

@@ -3088,8 +3088,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4260,8 +4260,8 @@ dependencies = [
[[package]]
name = "lance"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4315,7 +4315,7 @@ dependencies = [
"semver",
"serde",
"serde_json",
"snafu",
"snafu 0.9.0",
"tantivy",
"tokio",
"tokio-stream",
@@ -4327,8 +4327,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4347,8 +4347,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrayref",
"paste",
@@ -4357,8 +4357,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4384,7 +4384,7 @@ dependencies = [
"rand 0.9.2",
"roaring",
"serde_json",
"snafu",
"snafu 0.9.0",
"tempfile",
"tokio",
"tokio-stream",
@@ -4395,8 +4395,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-array",
@@ -4419,15 +4419,15 @@ dependencies = [
"pin-project",
"prost",
"prost-build",
"snafu",
"snafu 0.9.0",
"tokio",
"tracing",
]
[[package]]
name = "lance-datagen"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-array",
@@ -4445,8 +4445,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4473,7 +4473,7 @@ dependencies = [
"prost-build",
"prost-types",
"rand 0.9.2",
"snafu",
"snafu 0.9.0",
"strum",
"tokio",
"tracing",
@@ -4483,8 +4483,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4509,15 +4509,15 @@ dependencies = [
"prost",
"prost-build",
"prost-types",
"snafu",
"snafu 0.9.0",
"tokio",
"tracing",
]
[[package]]
name = "lance-index"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4569,7 +4569,7 @@ dependencies = [
"serde",
"serde_json",
"smallvec",
"snafu",
"snafu 0.9.0",
"tantivy",
"tempfile",
"tokio",
@@ -4580,8 +4580,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-arith",
@@ -4613,7 +4613,7 @@ dependencies = [
"prost",
"rand 0.9.2",
"serde",
"snafu",
"snafu 0.9.0",
"tempfile",
"tokio",
"tracing",
@@ -4622,8 +4622,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4639,21 +4639,21 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"async-trait",
"bytes",
"lance-core",
"lance-namespace-reqwest-client",
"snafu",
"snafu 0.9.0",
]
[[package]]
name = "lance-namespace-impls"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4675,7 +4675,7 @@ dependencies = [
"reqwest",
"serde",
"serde_json",
"snafu",
"snafu 0.9.0",
"tokio",
"tower",
"tower-http 0.5.2",
@@ -4697,8 +4697,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow",
"arrow-array",
@@ -4728,7 +4728,7 @@ dependencies = [
"semver",
"serde",
"serde_json",
"snafu",
"snafu 0.9.0",
"tokio",
"tracing",
"url",
@@ -4737,8 +4737,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "3.0.0-rc.2"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.2#3fb3e705b8a25ab1bb0fc9e1e0158e8a13356181"
version = "4.0.0-beta.6"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.6#623fc908348ba4fa708357b56f430ef4132bbe8c"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4819,7 +4819,7 @@ dependencies = [
"serde",
"serde_json",
"serde_with",
"snafu",
"snafu 0.8.9",
"tempfile",
"test-log",
"tokenizers",
@@ -4865,7 +4865,7 @@ dependencies = [
"pyo3",
"pyo3-async-runtimes",
"pyo3-build-config",
"snafu",
"snafu 0.8.9",
"tokio",
]
@@ -7777,7 +7777,16 @@ version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2"
dependencies = [
"snafu-derive",
"snafu-derive 0.8.9",
]
[[package]]
name = "snafu"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1d4bced6a69f90b2056c03dcff2c4737f98d6fb9e0853493996e1d253ca29c6"
dependencies = [
"snafu-derive 0.9.0",
]
[[package]]
@@ -7792,6 +7801,18 @@ dependencies = [
"syn 2.0.114",
]
[[package]]
name = "snafu-derive"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"syn 2.0.114",
]
[[package]]
name = "socket2"
version = "0.5.10"

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=3.0.0-rc.2", default-features = false, "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=3.0.0-rc.2", "tag" = "v3.0.0-rc.2", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>3.1.0-beta.2</lance-core.version>
<lance-core.version>4.0.0-beta.6</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -450,31 +450,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
},
);
describe("delete", () => {
let tmpDir: tmp.DirResult;
let table: Table;
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const conn = await connect(tmpDir.name);
table = await conn.createTable("delete_test", [
{ id: 1, value: "a" },
{ id: 2, value: "b" },
{ id: 3, value: "c" },
{ id: 4, value: "d" },
{ id: 5, value: "e" },
]);
});
afterEach(() => tmpDir.removeCallback());
test("returns num_deleted_rows", async () => {
const result = await table.delete("id > 3");
expect(result.numDeletedRows).toBe(2);
expect(result.version).toBe(2);
expect(await table.countRows()).toBe(3);
});
});
describe("merge insert", () => {
let tmpDir: tmp.DirResult;
let table: Table;
@@ -1722,6 +1697,65 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(results2[0].text).toBe(data[1].text);
});
test("full text search fast search", async () => {
const db = await connect(tmpDir.name);
const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts(),
});
// Insert unindexed data after creating the index.
await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
const withFlatSearch = await table
.search("xyz", "fts")
.limit(10)
.toArray();
expect(withFlatSearch.length).toBeGreaterThan(0);
const fastSearchResults = await table
.search("xyz", "fts")
.fastSearch()
.limit(10)
.toArray();
expect(fastSearchResults.length).toBe(0);
const nearestToTextFastSearch = await table
.query()
.nearestToText("xyz")
.fastSearch()
.limit(10)
.toArray();
expect(nearestToTextFastSearch.length).toBe(0);
// fastSearch should be chainable with other methods.
const chainedFastSearch = await table
.search("xyz", "fts")
.fastSearch()
.select(["text"])
.limit(5)
.toArray();
expect(chainedFastSearch.length).toBe(0);
await table.optimize();
const indexedFastSearch = await table
.search("xyz", "fts")
.fastSearch()
.limit(10)
.toArray();
expect(indexedFastSearch.length).toBeGreaterThan(0);
const indexedNearestToTextFastSearch = await table
.query()
.nearestToText("xyz")
.fastSearch()
.limit(10)
.toArray();
expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
});
test("prewarm full text search index", async () => {
const db = await connect(tmpDir.name);
const data = [

View File

@@ -606,6 +606,7 @@ class LanceQueryBuilder(ABC):
query,
ordering_field_name=ordering_field_name,
fts_columns=fts_columns,
fast_search=fast_search,
)
if isinstance(query, list):
@@ -1456,13 +1457,14 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
query: str | FullTextQuery,
ordering_field_name: Optional[str] = None,
fts_columns: Optional[Union[str, List[str]]] = None,
fast_search: bool = None,
):
super().__init__(table)
self._query = query
self._phrase_query = False
self.ordering_field_name = ordering_field_name
self._reranker = None
self._fast_search = None
self._fast_search = fast_search
if isinstance(fts_columns, str):
fts_columns = [fts_columns]
self._fts_columns = fts_columns

View File

@@ -218,8 +218,6 @@ class RemoteTable(Table):
train: bool = True,
):
"""Create an index on the table.
Currently, the only parameters that matter are
the metric and the vector column name.
Parameters
----------
@@ -250,11 +248,6 @@ class RemoteTable(Table):
>>> table.create_index("l2", "vector") # doctest: +SKIP
"""
if num_sub_vectors is not None:
logging.warning(
"num_sub_vectors is not supported on LanceDB cloud."
"This parameter will be tuned automatically."
)
if accelerator is not None:
logging.warning(
"GPU accelerator is not yet supported on LanceDB cloud."

View File

@@ -27,6 +27,7 @@ from lancedb.query import (
PhraseQuery,
BooleanQuery,
Occur,
LanceFtsQueryBuilder,
)
import numpy as np
import pyarrow as pa
@@ -920,6 +921,10 @@ def test_fts_fast_search(table):
assert query.limit == 5
assert query.columns == ["text"]
# fast_search should be enabled by keyword argument too
query = LanceFtsQueryBuilder(table, "xyz", fast_search=True).to_query_object()
assert query.fast_search is True
# Verify it executes without error and skips unindexed data
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
assert len(results) == 0

View File

@@ -71,7 +71,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
"Failed to call fetch_storage_options: {}",
e
))),
location: snafu::location!(),
location: std::panic::Location::caller(),
})?;
// If result is None, return None
@@ -83,7 +83,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let result_dict = result.downcast::<PyDict>().map_err(|_| {
lance_core::Error::InvalidInput {
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
location: snafu::location!(),
location: std::panic::Location::caller(),
}
})?;
@@ -93,13 +93,13 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let key_str: String = key.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option key must be a string: {}", e).into(),
location: snafu::location!(),
location: std::panic::Location::caller(),
}
})?;
let value_str: String = value.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option value must be a string: {}", e).into(),
location: snafu::location!(),
location: std::panic::Location::caller(),
}
})?;
storage_options.insert(key_str, value_str);
@@ -114,7 +114,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
"Task join error: {}",
e
))),
location: snafu::location!(),
location: std::panic::Location::caller(),
})?
}

View File

@@ -27,7 +27,7 @@
///
/// The btree index does not currently have any parameters though parameters such as the
/// block size may be added in the future.
#[derive(Default, Debug, Clone)]
#[derive(Default, Debug, Clone, serde::Serialize)]
pub struct BTreeIndexBuilder {}
impl BTreeIndexBuilder {}
@@ -39,7 +39,7 @@ impl BTreeIndexBuilder {}
/// This index works best for low-cardinality (i.e., less than 1000 unique values) columns,
/// where the number of unique values is small.
/// The bitmap stores a list of row ids where the value is present.
#[derive(Debug, Clone, Default)]
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct BitmapIndexBuilder {}
/// Builder for LabelList index.
@@ -48,7 +48,7 @@ pub struct BitmapIndexBuilder {}
/// support queries with `array_contains_all` and `array_contains_any`
/// using an underlying bitmap index.
///
#[derive(Debug, Clone, Default)]
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct LabelListIndexBuilder {}
pub use lance_index::scalar::inverted::query::*;

View File

@@ -7,6 +7,7 @@
//! Vector indices are only supported on fixed-size-list (tensor) columns of floating point
//! values
use lance::table::format::{IndexMetadata, Manifest};
use serde::Serialize;
use crate::DistanceType;
@@ -181,14 +182,17 @@ macro_rules! impl_hnsw_params_setter {
/// The partitioning process is called IVF and the `num_partitions` parameter controls how many groups to create.
///
/// Note that training an IVF Flat index on a large dataset is a slow operation and currently is also a memory intensive operation.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfFlatIndexBuilder {
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
// IVF
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
}
@@ -213,14 +217,17 @@ impl IvfFlatIndexBuilder {
///
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
/// It offers a balance between search performance and storage footprint.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfSqIndexBuilder {
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
// IVF
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
}
@@ -261,18 +268,23 @@ impl IvfSqIndexBuilder {
///
/// Note that training an IVF PQ index on a large dataset is a slow operation and
/// currently is also a memory intensive operation.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfPqIndexBuilder {
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
// IVF
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
// PQ
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_sub_vectors: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_bits: Option<u32>,
}
@@ -323,14 +335,18 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
///
/// Note that training an IVF RQ index on a large dataset is a slow operation and
/// currently is also a memory intensive operation.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfRqIndexBuilder {
// IVF
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_bits: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
}
@@ -365,13 +381,16 @@ impl IvfRqIndexBuilder {
/// quickly find the closest vectors to a query vector.
///
/// The PQ (product quantizer) is used to compress the vectors as the same as IVF PQ.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfHnswPqIndexBuilder {
// IVF
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
// HNSW
@@ -379,7 +398,9 @@ pub struct IvfHnswPqIndexBuilder {
pub(crate) ef_construction: u32,
// PQ
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_sub_vectors: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_bits: Option<u32>,
}
@@ -415,13 +436,16 @@ impl IvfHnswPqIndexBuilder {
///
/// The SQ (scalar quantizer) is used to compress the vectors,
/// each vector is mapped to a 8-bit integer vector, 4x compression ratio for float32 vector.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub struct IvfHnswSqIndexBuilder {
// IVF
#[serde(rename = "metric_type")]
pub(crate) distance_type: DistanceType,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) num_partitions: Option<u32>,
pub(crate) sample_rate: u32,
pub(crate) max_iterations: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) target_partition_size: Option<u32>,
// HNSW

View File

@@ -1276,73 +1276,24 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
);
}
match index.index {
// TODO: Should we pass the actual index parameters? SaaS does not
// yet support them.
Index::IvfFlat(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
}
Index::IvfPq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
if let Some(num_bits) = index.num_bits {
body["num_bits"] = serde_json::Value::Number(num_bits.into());
}
}
Index::IvfSq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
}
Index::IvfHnswSq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
}
Index::IvfRq(index) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
if let Some(num_partitions) = index.num_partitions {
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
}
if let Some(num_bits) = index.num_bits {
body["num_bits"] = serde_json::Value::Number(num_bits.into());
}
}
Index::BTree(_) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
}
Index::Bitmap(_) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
}
Index::LabelList(_) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
}
Index::FTS(fts) => {
body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
message: format!("failed to serialize FTS index params {:?}", e),
})?;
for (key, value) in params.as_object().unwrap() {
body[key] = value.clone();
}
}
fn to_json(params: &impl serde::Serialize) -> crate::Result<serde_json::Value> {
serde_json::to_value(params).map_err(|e| Error::InvalidInput {
message: format!("failed to serialize index params {:?}", e),
})
}
// Map each Index variant to its wire type name and serializable params.
// Auto is special-cased since it needs schema inspection.
let (index_type_str, params) = match &index.index {
Index::IvfFlat(p) => ("IVF_FLAT", Some(to_json(p)?)),
Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
Index::FTS(p) => ("FTS", Some(to_json(p)?)),
Index::Auto => {
let schema = self.schema().await?;
let field = schema
@@ -1351,11 +1302,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
message: format!("Column {} not found in schema", column),
})?;
if supported_vector_data_type(field.data_type()) {
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
body[METRIC_TYPE_KEY] =
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
("IVF_PQ", None)
} else if supported_btree_data_type(field.data_type()) {
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
("BTREE", None)
} else {
return Err(Error::NotSupported {
message: format!(
@@ -1373,6 +1324,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
}
};
body[INDEX_TYPE_KEY] = index_type_str.into();
if let Some(params) = params {
for (key, value) in params.as_object().expect("params should be a JSON object") {
body[key] = value.clone();
}
}
let request = request.json(&body);
let (request_id, response) = self.send(request, true).await?;
@@ -1833,7 +1791,9 @@ mod tests {
use rstest::rstest;
use serde_json::json;
use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
use crate::index::vector::{
IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
};
use crate::remote::db::DEFAULT_SERVER_VERSION;
use crate::remote::JSON_CONTENT_TYPE;
use crate::utils::background_cache::clock;
@@ -2995,6 +2955,8 @@ mod tests {
"IVF_FLAT",
json!({
"metric_type": "hamming",
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
),
@@ -3003,6 +2965,8 @@ mod tests {
json!({
"metric_type": "hamming",
"num_partitions": 128,
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfFlat(
IvfFlatIndexBuilder::default()
@@ -3014,6 +2978,8 @@ mod tests {
"IVF_PQ",
json!({
"metric_type": "l2",
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfPq(Default::default()),
),
@@ -3023,6 +2989,8 @@ mod tests {
"metric_type": "cosine",
"num_partitions": 128,
"num_bits": 4,
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfPq(
IvfPqIndexBuilder::default()
@@ -3031,10 +2999,29 @@ mod tests {
.num_bits(4),
),
),
(
"IVF_PQ",
json!({
"metric_type": "l2",
"num_sub_vectors": 16,
"sample_rate": 512,
"max_iterations": 100,
}),
Index::IvfPq(
IvfPqIndexBuilder::default()
.num_sub_vectors(16)
.sample_rate(512)
.max_iterations(100),
),
),
(
"IVF_HNSW_SQ",
json!({
"metric_type": "l2",
"sample_rate": 256,
"max_iterations": 50,
"m": 20,
"ef_construction": 300,
}),
Index::IvfHnswSq(Default::default()),
),
@@ -3043,11 +3030,65 @@ mod tests {
json!({
"metric_type": "l2",
"num_partitions": 128,
"sample_rate": 256,
"max_iterations": 50,
"m": 40,
"ef_construction": 500,
}),
Index::IvfHnswSq(
IvfHnswSqIndexBuilder::default()
.distance_type(DistanceType::L2)
.num_partitions(128),
.num_partitions(128)
.num_edges(40)
.ef_construction(500),
),
),
(
"IVF_SQ",
json!({
"metric_type": "l2",
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfSq(Default::default()),
),
(
"IVF_SQ",
json!({
"metric_type": "cosine",
"num_partitions": 64,
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfSq(
IvfSqIndexBuilder::default()
.distance_type(DistanceType::Cosine)
.num_partitions(64),
),
),
(
"IVF_RQ",
json!({
"metric_type": "l2",
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfRq(Default::default()),
),
(
"IVF_RQ",
json!({
"metric_type": "cosine",
"num_partitions": 64,
"num_bits": 8,
"sample_rate": 256,
"max_iterations": 50,
}),
Index::IvfRq(
IvfRqIndexBuilder::default()
.distance_type(DistanceType::Cosine)
.num_partitions(64)
.num_bits(8),
),
),
// HNSW_PQ isn't yet supported on SaaS