feat: add analyze_plan api (#2280)

add analyze plan api to allow executing the queries and see runtime
metrics.
Which help identify the query IO overhead and help identify query
slowness
This commit is contained in:
LuQQiu
2025-03-28 14:28:52 -07:00
committed by GitHub
parent a547c523c2
commit a1d1833a40
18 changed files with 538 additions and 45 deletions

101
Cargo.lock generated
View File

@@ -390,9 +390,9 @@ dependencies = [
[[package]]
name = "async-compression"
version = "0.4.21"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0cf008e5e1a9e9e22a7d3c9a4992e21a350290069e36d8fb72304ed17e8f2d2"
checksum = "59a194f9d963d8099596278594b3107448656ba73831c9d8c783e613ce86da64"
dependencies = [
"flate2",
"futures-core",
@@ -512,9 +512,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "aws-config"
version = "1.6.0"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a84fe2c5e9965fba0fbc2001db252f1d57527d82a905cca85127df227bca748"
checksum = "8c39646d1a6b51240a1a23bb57ea4eebede7e16fbc237fdc876980233dcecb4f"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -603,9 +603,9 @@ dependencies = [
[[package]]
name = "aws-sdk-bedrockruntime"
version = "1.77.0"
version = "1.78.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4198493316dab97e1fed7716f3823462b73a34c518f4ee7b9799921645e232e5"
checksum = "6457fd617f20075dd2d5c9f6c3cb09815e6b83ab61c55499055499da9e04478d"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -627,9 +627,9 @@ dependencies = [
[[package]]
name = "aws-sdk-dynamodb"
version = "1.69.0"
version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c42f454f50a050aaa3f3d200a3ac072e48c18c4bb5356c38be7eee1da1439a43"
checksum = "4ac281113af7f8700394bf25eb272b842b7ca088810e96c928f812282f2e6f44"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -650,9 +650,9 @@ dependencies = [
[[package]]
name = "aws-sdk-kms"
version = "1.63.0"
version = "1.64.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a971bfe62ca4a228627a1b74a87a7a142979b20b168d2e2884f4893212ebb715"
checksum = "c23289881f4071421bbef3688ca43501c4fede796e0cbca942a54f0eb6906fbc"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -664,6 +664,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"once_cell",
"regex-lite",
@@ -672,9 +673,9 @@ dependencies = [
[[package]]
name = "aws-sdk-s3"
version = "1.79.0"
version = "1.80.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f63ba8f5fca32061c7d62d866ef65470edde38d4c5f8a0ebb8ff40a0521e1c"
checksum = "3a36b09e8273d89c4f35ea122b83b30e48f906f3b644460d72a7d3656d1be93d"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -707,9 +708,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
version = "1.62.0"
version = "1.64.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d5330ad4e8a1ff49e9f26b738611caa72b105c41d41733801d1a36e8f9de936"
checksum = "02d4bdb0e5f80f0689e61c77ab678b2b9304af329616af38aef5b6b967b8e736"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -721,6 +722,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"once_cell",
"regex-lite",
@@ -729,9 +731,9 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
version = "1.63.0"
version = "1.65.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7956b1a85d49082347a7d17daa2e32df191f3e23c03d47294b99f95413026a78"
checksum = "acbbb3ce8da257aedbccdcb1aadafbbb6a5fe9adf445db0e1ea897bdc7e22d08"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -743,6 +745,7 @@ dependencies = [
"aws-smithy-types",
"aws-types",
"bytes",
"fastrand",
"http 0.2.12",
"once_cell",
"regex-lite",
@@ -751,9 +754,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
version = "1.63.0"
version = "1.65.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "065c533fbe6f84962af33fcf02b0350b7c1f79285baab5924615d2be3b232855"
checksum = "96a78a8f50a1630db757b60f679c8226a8a70ee2ab5f5e6e51dc67f6c61c7cfd"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -766,6 +769,7 @@ dependencies = [
"aws-smithy-types",
"aws-smithy-xml",
"aws-types",
"fastrand",
"http 0.2.12",
"once_cell",
"regex-lite",
@@ -869,9 +873,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http-client"
version = "1.0.0"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0497ef5d53065b7cd6a35e9c1654bd1fefeae5c52900d91d1b188b0af0f29324"
checksum = "8aff1159006441d02e57204bf57a1b890ba68bedb6904ffd2873c1c4c11c546b"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -904,6 +908,16 @@ dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-observability"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445d065e76bc1ef54963db400319f1dd3ebb3e0a74af20f7f7630625b0cc7cc0"
dependencies = [
"aws-smithy-runtime-api",
"once_cell",
]
[[package]]
name = "aws-smithy-query"
version = "0.60.7"
@@ -916,13 +930,14 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
version = "1.8.0"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6328865e36c6fd970094ead6b05efd047d3a80ec5fc3be5e743910da9f2ebf8"
checksum = "0152749e17ce4d1b47c7747bdfec09dac1ccafdcbc741ebf9daa2a373356730f"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-http-client",
"aws-smithy-observability",
"aws-smithy-runtime-api",
"aws-smithy-types",
"bytes",
@@ -2537,9 +2552,9 @@ dependencies = [
[[package]]
name = "event-listener-strategy"
version = "0.5.3"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2"
checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93"
dependencies = [
"event-listener 5.4.0",
"pin-project-lite",
@@ -3367,9 +3382,9 @@ dependencies = [
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
checksum = "7515e6d781098bf9f7205ab3fc7e9709d34554ae0b21ddbcb5febfa4bc7df11d"
[[package]]
name = "icu_normalizer"
@@ -3391,9 +3406,9 @@ dependencies = [
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
checksum = "c5e8338228bdc8ab83303f16b797e177953730f601a96c25d10cb3ab0daa0cb7"
[[package]]
name = "icu_properties"
@@ -3412,9 +3427,9 @@ dependencies = [
[[package]]
name = "icu_properties_data"
version = "1.5.0"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
checksum = "85fb8799753b75aee8d2a21d7c14d9f38921b54b3dbda10f5a3c7a7b82dba5e2"
[[package]]
name = "icu_provider"
@@ -4035,7 +4050,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.18.2-beta.0"
version = "0.18.3-beta.0"
dependencies = [
"arrow",
"arrow-array",
@@ -4122,7 +4137,7 @@ dependencies = [
[[package]]
name = "lancedb-node"
version = "0.18.2-beta.0"
version = "0.18.3-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4147,7 +4162,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.18.2-beta.0"
version = "0.18.3-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -4165,7 +4180,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.21.2-beta.0"
version = "0.21.3-beta.0"
dependencies = [
"arrow",
"env_logger",
@@ -4902,9 +4917,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.21.1"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "oneshot"
@@ -5727,9 +5742,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.37.2"
version = "0.37.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003"
checksum = "bf763ab1c7a3aa408be466efc86efe35ed1bd3dd74173ed39d6b0d0a6f0ba148"
dependencies = [
"memchr",
"serde",
@@ -5777,9 +5792,9 @@ dependencies = [
[[package]]
name = "quinn-udp"
version = "0.5.10"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
checksum = "541d0f57c6ec747a90738a52741d3221f7960e8ac2f0ff4b1a63680e033b4ab5"
dependencies = [
"cfg_aliases",
"libc",
@@ -6269,7 +6284,7 @@ dependencies = [
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.103.0",
"rustls-webpki 0.103.1",
"subtle",
"zeroize",
]
@@ -6337,9 +6352,9 @@ dependencies = [
[[package]]
name = "rustls-webpki"
version = "0.103.0"
version = "0.103.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0aa4eeac2588ffff23e9d7a7e9b3f971c5fb5b7ebc9452745e0c232c64f83b2f"
checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03"
dependencies = [
"aws-lc-rs",
"ring",

View File

@@ -30,6 +30,53 @@ protected inner: Query | Promise<Query>;
## Methods
### analyzePlan()
```ts
analyzePlan(): Promise<string>
```
Executes the query and returns the physical query plan annotated with runtime metrics.
This is useful for debugging and performance analysis, as it shows how the query was executed
and includes metrics such as elapsed time, rows processed, and I/O statistics.
#### Returns
`Promise`&lt;`string`&gt;
A query execution plan with runtime metrics for each step.
#### Example
```ts
import * as lancedb from "@lancedb/lancedb"
const db = await lancedb.connect("./.lancedb");
const table = await db.createTable("my_table", [
{ vector: [1.1, 0.9], id: "1" },
]);
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
Example output (with runtime metrics inlined):
AnalyzeExec verbose=true, metrics=[]
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
***
### execute()
```ts

View File

@@ -36,6 +36,49 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;
## Methods
### analyzePlan()
```ts
analyzePlan(): Promise<string>
```
Executes the query and returns the physical query plan annotated with runtime metrics.
This is useful for debugging and performance analysis, as it shows how the query was executed
and includes metrics such as elapsed time, rows processed, and I/O statistics.
#### Returns
`Promise`&lt;`string`&gt;
A query execution plan with runtime metrics for each step.
#### Example
```ts
import * as lancedb from "@lancedb/lancedb"
const db = await lancedb.connect("./.lancedb");
const table = await db.createTable("my_table", [
{ vector: [1.1, 0.9], id: "1" },
]);
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
Example output (with runtime metrics inlined):
AnalyzeExec verbose=true, metrics=[]
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
```
***
### execute()
```ts

View File

@@ -48,6 +48,53 @@ addQueryVector(vector): VectorQuery
***
### analyzePlan()
```ts
analyzePlan(): Promise<string>
```
Executes the query and returns the physical query plan annotated with runtime metrics.
This is useful for debugging and performance analysis, as it shows how the query was executed
and includes metrics such as elapsed time, rows processed, and I/O statistics.
#### Returns
`Promise`&lt;`string`&gt;
A query execution plan with runtime metrics for each step.
#### Example
```ts
import * as lancedb from "@lancedb/lancedb"
const db = await lancedb.connect("./.lancedb");
const table = await db.createTable("my_table", [
{ vector: [1.1, 0.9], id: "1" },
]);
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
Example output (with runtime metrics inlined):
AnalyzeExec verbose=true, metrics=[]
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
***
### bypassVectorIndex()
```ts

View File

@@ -36,3 +36,9 @@ print the resolved query plan. You can use the `explain_plan` method to do this:
* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan]
* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan]
* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan)
To understand how a query was actually executed—including metrics like execution time, number of rows processed, I/O stats, and more—use the analyze_plan method. This executes the query and returns a physical execution plan annotated with runtime metrics, making it especially helpful for performance tuning and debugging.
* Python Sync: [LanceQueryBuilder.analyze_plan][lancedb.query.LanceQueryBuilder.analyze_plan]
* Python Async: [AsyncQueryBase.analyze_plan][lancedb.query.AsyncQueryBase.analyze_plan]
* Node @lancedb/lancedb: [LanceQueryBuilder.analyzePlan](/lancedb/js/classes/QueryBase/#analyzePlan)

View File

@@ -633,6 +633,23 @@ describe("When creating an index", () => {
expect(plan2).not.toMatch("LanceScan");
});
it("should be able to run analyze plan", async () => {
await tbl.createIndex("vec");
await tbl.add([
{
id: 300,
vec: Array(32)
.fill(1)
.map(() => Math.random()),
tags: [],
},
]);
const plan = await tbl.query().nearestTo(queryVec).analyzePlan();
expect(plan).toMatch("AnalyzeExec");
expect(plan).toMatch("metrics=");
});
it("should be able to query with row id", async () => {
const results = await tbl
.query()
@@ -1346,6 +1363,30 @@ describe("when calling explainPlan", () => {
});
});
describe("when calling analyzePlan", () => {
let tmpDir: tmp.DirResult;
let table: Table;
let queryVec: number[];
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
const con = await connect(tmpDir.name);
table = await con.createTable("vectors", [{ id: 1, vector: [1.1, 0.9] }]);
});
afterEach(() => {
tmpDir.removeCallback();
});
it("retrieves runtime metrics", async () => {
queryVec = Array(2)
.fill(1)
.map(() => Math.random());
const plan = await table.query().nearestTo(queryVec).analyzePlan();
console.log("Query Plan:\n", plan); // <--- Print the plan
expect(plan).toMatch("AnalyzeExec");
});
});
describe("column name options", () => {
let tmpDir: tmp.DirResult;
let table: Table;

View File

@@ -348,6 +348,43 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
return this.inner.explainPlan(verbose);
}
}
/**
* Executes the query and returns the physical query plan annotated with runtime metrics.
*
* This is useful for debugging and performance analysis, as it shows how the query was executed
* and includes metrics such as elapsed time, rows processed, and I/O statistics.
*
* @example
* import * as lancedb from "@lancedb/lancedb"
*
* const db = await lancedb.connect("./.lancedb");
* const table = await db.createTable("my_table", [
* { vector: [1.1, 0.9], id: "1" },
* ]);
*
* const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
*
* Example output (with runtime metrics inlined):
* AnalyzeExec verbose=true, metrics=[]
* ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
* Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
* CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
* GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
* FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
* SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
* KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
* LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
*
* @returns A query execution plan with runtime metrics for each step.
*/
async analyzePlan(): Promise<string> {
if (this.inner instanceof Promise) {
return this.inner.then((inner) => inner.analyzePlan());
} else {
return this.inner.analyzePlan();
}
}
}
/**

View File

@@ -114,6 +114,16 @@ impl Query {
))
})
}
#[napi(catch_unwind)]
pub async fn analyze_plan(&self) -> napi::Result<String> {
self.inner.analyze_plan().await.map_err(|e| {
napi::Error::from_reason(format!(
"Failed to execute analyze plan: {}",
convert_error(&e)
))
})
}
}
#[napi]
@@ -259,4 +269,14 @@ impl VectorQuery {
))
})
}
#[napi(catch_unwind)]
pub async fn analyze_plan(&self) -> napi::Result<String> {
self.inner.analyze_plan().await.map_err(|e| {
napi::Error::from_reason(format!(
"Failed to execute analyze plan: {}",
convert_error(&e)
))
})
}
}

View File

@@ -96,6 +96,7 @@ class Query:
def nearest_to_text(self, query: dict) -> FTSQuery: ...
async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
async def explain_plan(self, verbose: Optional[bool]) -> str: ...
async def analyze_plan(self) -> str: ...
def to_query_request(self) -> PyQueryRequest: ...
class FTSQuery:

View File

@@ -659,6 +659,44 @@ class LanceQueryBuilder(ABC):
""" # noqa: E501
return self._table._explain_plan(self.to_query_object(), verbose=verbose)
def analyze_plan(self) -> str:
"""
Run the query and return its execution plan with runtime metrics.
This returns detailed metrics for each step, such as elapsed time,
rows processed, bytes read, and I/O stats. It is useful for debugging
and performance tuning.
Examples
--------
>>> import lancedb
>>> db = lancedb.connect("./.lancedb")
>>> table = db.create_table("my_table", [{"vector": [99.0, 99]}])
>>> query = [100, 100]
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[]
ProjectionExec: expr=[...], metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...]
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
LanceScan: uri=..., projection=[vector], row_id=true,
row_addr=false, ordered=false,
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...]
Returns
-------
plan : str
The physical query execution plan with runtime metrics.
"""
return self._table._analyze_plan(self.to_query_object())
def vector(self, vector: Union[np.ndarray, list]) -> Self:
"""Set the vector to search for.
@@ -1941,6 +1979,15 @@ class AsyncQueryBase(object):
""" # noqa: E501
return await self._inner.explain_plan(verbose)
async def analyze_plan(self):
"""Execute the query and display with runtime metrics.
Returns
-------
plan : str
"""
return await self._inner.analyze_plan()
class AsyncQuery(AsyncQueryBase):
def __init__(self, inner: LanceQuery):
@@ -2510,7 +2557,7 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
Returns
-------
plan
plan : str
""" # noqa: E501
results = ["Vector Search Plan:"]
@@ -2519,3 +2566,23 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
results.append(await self._inner.to_fts_query().explain_plan(verbose))
return "\n".join(results)
async def analyze_plan(self):
"""
Execute the query and return the physical execution plan with runtime metrics.
This runs both the vector and FTS (full-text search) queries and returns
detailed metrics for each step of execution—such as rows processed,
elapsed time, I/O stats, and more. Its useful for debugging and
performance analysis.
Returns
-------
plan : str
"""
results = ["Vector Search Query:"]
results.append(await self._inner.to_vector_query().analyze_plan())
results.append("FTS Search Query:")
results.append(await self._inner.to_fts_query().analyze_plan())
return "\n".join(results)

View File

@@ -371,6 +371,9 @@ class RemoteTable(Table):
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
return LOOP.run(self._table._explain_plan(query, verbose))
def _analyze_plan(self, query: Query) -> str:
return LOOP.run(self._table._analyze_plan(query))
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
"""Returns a [`LanceMergeInsertBuilder`][lancedb.merge.LanceMergeInsertBuilder]
that can be used to create a "merge insert" operation.

View File

@@ -1010,6 +1010,9 @@ class Table(ABC):
@abstractmethod
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str: ...
@abstractmethod
def _analyze_plan(self, query: Query) -> str: ...
@abstractmethod
def _do_merge(
self,
@@ -2318,6 +2321,9 @@ class LanceTable(Table):
def _explain_plan(self, query: Query, verbose: Optional[bool] = False) -> str:
return LOOP.run(self._table._explain_plan(query, verbose))
def _analyze_plan(self, query: Query) -> str:
return LOOP.run(self._table._analyze_plan(query))
def _do_merge(
self,
merge: LanceMergeInsertBuilder,
@@ -3388,6 +3394,11 @@ class AsyncTable:
async_query = self._sync_query_to_async(query)
return await async_query.explain_plan(verbose)
async def _analyze_plan(self, query: Query) -> str:
# This method is used by the sync table
async_query = self._sync_query_to_async(query)
return await async_query.analyze_plan()
async def _do_merge(
self,
merge: LanceMergeInsertBuilder,

View File

@@ -114,6 +114,16 @@ async def test_explain_plan(table: AsyncTable):
assert "LanceScan" in plan
@pytest.mark.asyncio
async def test_analyze_plan(table: AsyncTable):
res = await (
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).analyze_plan()
)
assert "AnalyzeExec" in res
assert "metrics=" in res
def test_normalize_scores():
cases = [
(pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),

View File

@@ -702,6 +702,20 @@ async def test_fast_search_async(tmp_path):
assert "LanceScan" not in plan
def test_analyze_plan(table):
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
res = q.analyze_plan()
assert "AnalyzeExec" in res
assert "metrics=" in res
@pytest.mark.asyncio
async def test_analyze_plan_async(table_async: AsyncTable):
res = await table_async.query().nearest_to(pa.array([1, 2])).analyze_plan()
assert "AnalyzeExec" in res
assert "metrics=" in res
def test_explain_plan(table):
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
plan = q.explain_plan(verbose=True)

View File

@@ -281,6 +281,16 @@ impl Query {
})
}
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
inner
.analyze_plan()
.await
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
})
}
pub fn to_query_request(&self) -> PyQueryRequest {
PyQueryRequest::from(AnyQuery::Query(self.inner.clone().into_request()))
}
@@ -365,6 +375,16 @@ impl FTSQuery {
})
}
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
inner
.analyze_plan()
.await
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
})
}
pub fn get_query(&self) -> String {
self.fts_query.query.clone()
}
@@ -480,6 +500,16 @@ impl VectorQuery {
})
}
pub fn analyze_plan(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
inner
.analyze_plan()
.await
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
})
}
pub fn nearest_to_text(&mut self, query: Bound<'_, PyDict>) -> PyResult<HybridQuery> {
let base_query = self.inner.clone().into_plain();
let fts_query = Query::new(base_query).nearest_to_text(query)?;

View File

@@ -579,6 +579,15 @@ pub trait ExecutableQuery {
) -> impl Future<Output = Result<SendableRecordBatchStream>> + Send;
fn explain_plan(&self, verbose: bool) -> impl Future<Output = Result<String>> + Send;
fn analyze_plan(&self) -> impl Future<Output = Result<String>> + Send {
self.analyze_plan_with_options(QueryExecutionOptions::default())
}
fn analyze_plan_with_options(
&self,
options: QueryExecutionOptions,
) -> impl Future<Output = Result<String>> + Send;
}
/// A query filter that can be applied to a query
@@ -765,6 +774,11 @@ impl ExecutableQuery for Query {
let query = AnyQuery::Query(self.request.clone());
self.parent.explain_plan(&query, verbose).await
}
async fn analyze_plan_with_options(&self, options: QueryExecutionOptions) -> Result<String> {
let query = AnyQuery::Query(self.request.clone());
self.parent.analyze_plan(&query, options).await
}
}
/// A request for a nearest-neighbors search into a table
@@ -1089,6 +1103,11 @@ impl ExecutableQuery for VectorQuery {
let query = AnyQuery::VectorQuery(self.request.clone());
self.parent.explain_plan(&query, verbose).await
}
async fn analyze_plan_with_options(&self, options: QueryExecutionOptions) -> Result<String> {
let query = AnyQuery::VectorQuery(self.request.clone());
self.parent.analyze_plan(&query, options).await
}
}
impl HasQuery for VectorQuery {
@@ -1370,6 +1389,31 @@ mod tests {
}
}
#[tokio::test]
async fn test_analyze_plan() {
let tmp_dir = tempdir().unwrap();
let table = make_test_table(&tmp_dir).await;
let result = table.query().analyze_plan().await.unwrap();
assert!(result.contains("metrics="));
}
#[tokio::test]
async fn test_analyze_plan_with_options() {
let tmp_dir = tempdir().unwrap();
let table = make_test_table(&tmp_dir).await;
let result = table
.query()
.analyze_plan_with_options(QueryExecutionOptions {
max_batch_length: 10,
..Default::default()
})
.await
.unwrap();
assert!(result.contains("metrics="));
}
fn assert_plan_exists(plan: &Arc<dyn ExecutionPlan>, name: &str) -> bool {
if plan.name() == name {
return true;

View File

@@ -614,6 +614,48 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
Ok(final_plan)
}
async fn analyze_plan(
&self,
query: &AnyQuery,
_options: QueryExecutionOptions,
) -> Result<String> {
let request = self
.client
.post(&format!("/v1/table/{}/analyze_plan/", self.name));
let query_bodies = self.prepare_query_bodies(query).await?;
let requests: Vec<reqwest::RequestBuilder> = query_bodies
.into_iter()
.map(|body| request.try_clone().unwrap().json(&body))
.collect();
let futures = requests.into_iter().map(|req| async move {
let (request_id, response) = self.client.send(req, true).await?;
let response = self.check_table_response(&request_id, response).await?;
let body = response.text().await.err_to_http(request_id.clone())?;
serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!("Failed to execute analyze plan: {}", e).into(),
request_id,
status_code: None,
})
});
let analyze_result_texts = futures::future::try_join_all(futures).await?;
let final_analyze = if analyze_result_texts.len() > 1 {
analyze_result_texts
.into_iter()
.enumerate()
.map(|(i, plan)| format!("--- Query #{} ---\n{}", i + 1, plan))
.collect::<Vec<_>>()
.join("\n\n")
} else {
analyze_result_texts.into_iter().next().unwrap_or_default()
};
Ok(final_analyze)
}
async fn update(&self, update: UpdateBuilder) -> Result<u64> {
self.check_mutable().await?;
let request = self

View File

@@ -33,7 +33,7 @@ use lance::dataset::{
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
use lance::index::vector::utils::infer_vector_dim;
use lance::io::WrappingObjectStore;
use lance_datafusion::exec::execute_plan;
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
use lance_datafusion::utils::StreamingWriteSource;
use lance_index::vector::hnsw::builder::HnswBuildParams;
use lance_index::vector::ivf::IvfBuildParams;
@@ -433,6 +433,12 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
Ok(format!("{}", display.indent(verbose)))
}
async fn analyze_plan(
&self,
query: &AnyQuery,
options: QueryExecutionOptions,
) -> Result<String>;
/// Add new records to the table.
async fn add(
&self,
@@ -2192,6 +2198,15 @@ impl BaseTable for NativeTable {
self.generic_query(query, options).await
}
async fn analyze_plan(
&self,
query: &AnyQuery,
options: QueryExecutionOptions,
) -> Result<String> {
let plan = self.create_plan(query, options).await?;
Ok(lance_analyze_plan(plan, Default::default()).await?)
}
async fn merge_insert(
&self,
params: MergeInsertBuilder,