From a324f4ad7aaded8893e73c29f7f0fd94c1404d8f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 29 Oct 2024 15:13:34 -0700 Subject: [PATCH] feat(node): enable logging and show full errors (#1775) This exposes the `LANCEDB_LOG` environment variable in node, so that users can now turn on logging. In addition, fixes a bug where only the top-level error from Rust was being shown. This PR makes sure the full error chain is included in the error message. In the future, will improve this so the error chain is set on the [cause](https://nodejs.org/api/errors.html#errorcause) property of JS errors https://github.com/lancedb/lancedb/issues/1779 Fixes #1774 --- Cargo.toml | 1 + docs/mkdocs.yml | 3 +- docs/src/troubleshooting.md | 33 ++++++++++++++++++++++ nodejs/Cargo.toml | 2 ++ nodejs/__test__/remote.test.ts | 25 ++++++++++++++++ nodejs/src/connection.rs | 38 ++++++------------------- nodejs/src/error.rs | 27 +++++++++++++++++- nodejs/src/lib.rs | 9 ++++++ nodejs/src/merge.rs | 16 +++++++---- nodejs/src/query.rs | 21 +++++++++++--- nodejs/src/table.rs | 47 +++++-------------------------- python/Cargo.toml | 2 +- rust/lancedb/src/remote/client.rs | 13 ++++++++- 13 files changed, 155 insertions(+), 82 deletions(-) create mode 100644 docs/src/troubleshooting.md diff --git a/Cargo.toml b/Cargo.toml index 9d5725f1..263fab98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ async-trait = "0" chrono = "0.4.35" datafusion-common = "41.0" datafusion-physical-plan = "41.0" +env_logger = "0.10" half = { "version" = "=2.4.1", default-features = false, features = [ "num-traits", ] } diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 0f64a0a3..0bb1ebbe 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -211,9 +211,10 @@ nav: - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md - πŸ¦€ Rust: - Overview: examples/examples_rust.md - - Studies: + - πŸ““ Studies: - β†—Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/ - πŸ’­ FAQs: faq.md + - πŸ” Troubleshooting: troubleshooting.md - βš™οΈ API reference: - 🐍 Python: python/python.md - πŸ‘Ύ JavaScript (vectordb): javascript/modules.md diff --git a/docs/src/troubleshooting.md b/docs/src/troubleshooting.md new file mode 100644 index 00000000..595753d6 --- /dev/null +++ b/docs/src/troubleshooting.md @@ -0,0 +1,33 @@ +## Getting help + +The following sections provide various diagnostics and troubleshooting tips for LanceDB. +These can help you provide additional information when asking questions or making +error reports. + +For trouble shooting, the best place to ask is in our Discord, under the relevant +language channel. By asking in the language-specific channel, it makes it more +likely that someone who knows the answer will see your question. + +## Enabling logging + +To provide more information, especially for LanceDB Cloud related issues, enable +debug logging. You can set the `LANCEDB_LOG` environment variable: + +```shell +export LANCEDB_LOG=debug +``` + +You can turn off colors and formatting in the logs by setting + +```shell +export LANCEDB_LOG_STYLE=never +``` + +## Explaining query plans + +If you have slow queries or unexpected query results, it can be helpful to +print the resolved query plan. You can use the `explain_plan` method to do this: + +* Python Sync: [LanceQueryBuilder.explain_plan][lancedb.query.LanceQueryBuilder.explain_plan] +* Python Async: [AsyncQueryBase.explain_plan][lancedb.query.AsyncQueryBase.explain_plan] +* Node @lancedb/lancedb: [LanceQueryBuilder.explainPlan](/lancedb/js/classes/QueryBase/#explainplan) diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index f8c0dbb0..458eb4ef 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -13,6 +13,7 @@ crate-type = ["cdylib"] [dependencies] arrow-ipc.workspace = true +env_logger.workspace = true futures.workspace = true lancedb = { path = "../rust/lancedb", features = ["remote"] } napi = { version = "2.16.8", default-features = false, features = [ @@ -22,6 +23,7 @@ napi = { version = "2.16.8", default-features = false, features = [ napi-derive = "2.16.4" # Prevent dynamic linking of lzma, which comes from datafusion lzma-sys = { version = "*", features = ["static"] } +log.workspace = true [build-dependencies] napi-build = "2.1" diff --git a/nodejs/__test__/remote.test.ts b/nodejs/__test__/remote.test.ts index 3e693197..6dbac639 100644 --- a/nodejs/__test__/remote.test.ts +++ b/nodejs/__test__/remote.test.ts @@ -90,4 +90,29 @@ describe("remote connection", () => { }, ); }); + + it("shows the full error messages on retry errors", async () => { + await withMockDatabase( + (_req, res) => { + // We retry on 500 errors, so we return 500s until the client gives up. + res.writeHead(500).end("Internal Server Error"); + }, + async (db) => { + try { + await db.tableNames(); + fail("expected an error"); + // biome-ignore lint/suspicious/noExplicitAny: skip + } catch (e: any) { + expect(e.message).toContain("Hit retry limit for request_id="); + expect(e.message).toContain("Caused by: Http error"); + expect(e.message).toContain("500 Internal Server Error"); + } + }, + { + clientConfig: { + retryConfig: { retries: 2 }, + }, + }, + ); + }); }); diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 9f2a7305..01c41239 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -18,6 +18,7 @@ use std::str::FromStr; use napi::bindgen_prelude::*; use napi_derive::*; +use crate::error::{convert_error, NapiErrorExt}; use crate::table::Table; use crate::ConnectionOptions; use lancedb::connection::{ @@ -86,12 +87,7 @@ impl Connection { builder = builder.host_override(&host_override); } - Ok(Self::inner_new( - builder - .execute() - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?, - )) + Ok(Self::inner_new(builder.execute().await.default_error()?)) } #[napi] @@ -123,9 +119,7 @@ impl Connection { if let Some(limit) = limit { op = op.limit(limit); } - op.execute() - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e))) + op.execute().await.default_error() } /// Create table from a Apache Arrow IPC (file) buffer. @@ -156,17 +150,13 @@ impl Connection { } if let Some(data_storage_option) = data_storage_options.as_ref() { builder = builder.data_storage_version( - LanceFileVersion::from_str(data_storage_option) - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?, + LanceFileVersion::from_str(data_storage_option).map_err(|e| convert_error(&e))?, ); } if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths { builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths); } - let tbl = builder - .execute() - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; + let tbl = builder.execute().await.default_error()?; Ok(Table::new(tbl)) } @@ -195,17 +185,13 @@ impl Connection { } if let Some(data_storage_option) = data_storage_options.as_ref() { builder = builder.data_storage_version( - LanceFileVersion::from_str(data_storage_option) - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?, + LanceFileVersion::from_str(data_storage_option).map_err(|e| convert_error(&e))?, ); } if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths { builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths); } - let tbl = builder - .execute() - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; + let tbl = builder.execute().await.default_error()?; Ok(Table::new(tbl)) } @@ -225,19 +211,13 @@ impl Connection { if let Some(index_cache_size) = index_cache_size { builder = builder.index_cache_size(index_cache_size); } - let tbl = builder - .execute() - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; + let tbl = builder.execute().await.default_error()?; Ok(Table::new(tbl)) } /// Drop table with the name. Or raise an error if the table does not exist. #[napi(catch_unwind)] pub async fn drop_table(&self, name: String) -> napi::Result<()> { - self.get_inner()? - .drop_table(&name) - .await - .map_err(|e| napi::Error::from_reason(format!("{}", e))) + self.get_inner()?.drop_table(&name).await.default_error() } } diff --git a/nodejs/src/error.rs b/nodejs/src/error.rs index ddbb4471..7fec4725 100644 --- a/nodejs/src/error.rs +++ b/nodejs/src/error.rs @@ -7,6 +7,31 @@ pub trait NapiErrorExt { impl NapiErrorExt for std::result::Result { fn default_error(self) -> Result { - self.map_err(|err| napi::Error::from_reason(err.to_string())) + self.map_err(|err| convert_error(&err)) } } + +pub fn convert_error(err: &dyn std::error::Error) -> napi::Error { + let mut message = err.to_string(); + + // Append causes + let mut cause = err.source(); + let mut indent = 2; + while let Some(err) = cause { + let cause_message = format!("Caused by: {}", err); + message.push_str(&indent_string(&cause_message, indent)); + + cause = err.source(); + indent += 2; + } + + napi::Error::from_reason(message) +} + +fn indent_string(s: &str, amount: usize) -> String { + let indent = " ".repeat(amount); + s.lines() + .map(|line| format!("{}{}", indent, line)) + .collect::>() + .join("\n") +} diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 54fde9bc..a18bc75d 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; +use env_logger::Env; use napi_derive::*; mod connection; @@ -77,3 +78,11 @@ pub struct WriteOptions { pub struct OpenTableOptions { pub storage_options: Option>, } + +#[napi::module_init] +fn init() { + let env = Env::new() + .filter_or("LANCEDB_LOG", "trace") + .write_style("LANCEDB_LOG_STYLE"); + env_logger::init_from_env(env); +} diff --git a/nodejs/src/merge.rs b/nodejs/src/merge.rs index f13091fc..9228ec44 100644 --- a/nodejs/src/merge.rs +++ b/nodejs/src/merge.rs @@ -2,6 +2,8 @@ use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeIns use napi::bindgen_prelude::*; use napi_derive::napi; +use crate::error::convert_error; + #[napi] #[derive(Clone)] /// A builder used to create and run a merge insert operation @@ -35,14 +37,18 @@ impl NativeMergeInsertBuilder { pub async fn execute(&self, buf: Buffer) -> napi::Result<()> { let data = ipc_file_to_batches(buf.to_vec()) .and_then(IntoArrow::into_arrow) - .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?; + .map_err(|e| { + napi::Error::from_reason(format!("Failed to read IPC file: {}", convert_error(&e))) + })?; let this = self.clone(); - this.inner - .execute(data) - .await - .map_err(|e| napi::Error::from_reason(format!("Failed to execute merge insert: {}", e))) + this.inner.execute(data).await.map_err(|e| { + napi::Error::from_reason(format!( + "Failed to execute merge insert: {}", + convert_error(&e) + )) + }) } } diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index d3fe7283..d0132699 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -22,6 +22,7 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery; use napi::bindgen_prelude::*; use napi_derive::napi; +use crate::error::convert_error; use crate::error::NapiErrorExt; use crate::iterator::RecordBatchIterator; use crate::util::parse_distance_type; @@ -93,7 +94,10 @@ impl Query { .execute_with_options(execution_opts) .await .map_err(|e| { - napi::Error::from_reason(format!("Failed to execute query stream: {}", e)) + napi::Error::from_reason(format!( + "Failed to execute query stream: {}", + convert_error(&e) + )) })?; Ok(RecordBatchIterator::new(inner_stream)) } @@ -101,7 +105,10 @@ impl Query { #[napi] pub async fn explain_plan(&self, verbose: bool) -> napi::Result { self.inner.explain_plan(verbose).await.map_err(|e| { - napi::Error::from_reason(format!("Failed to retrieve the query plan: {}", e)) + napi::Error::from_reason(format!( + "Failed to retrieve the query plan: {}", + convert_error(&e) + )) }) } } @@ -190,7 +197,10 @@ impl VectorQuery { .execute_with_options(execution_opts) .await .map_err(|e| { - napi::Error::from_reason(format!("Failed to execute query stream: {}", e)) + napi::Error::from_reason(format!( + "Failed to execute query stream: {}", + convert_error(&e) + )) })?; Ok(RecordBatchIterator::new(inner_stream)) } @@ -198,7 +208,10 @@ impl VectorQuery { #[napi] pub async fn explain_plan(&self, verbose: bool) -> napi::Result { self.inner.explain_plan(verbose).await.map_err(|e| { - napi::Error::from_reason(format!("Failed to retrieve the query plan: {}", e)) + napi::Error::from_reason(format!( + "Failed to retrieve the query plan: {}", + convert_error(&e) + )) }) } } diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index f9b14e7f..b2da97de 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -72,10 +72,7 @@ impl Table { /// Return Schema as empty Arrow IPC file. #[napi(catch_unwind)] pub async fn schema(&self) -> napi::Result { - let schema = - self.inner_ref()?.schema().await.map_err(|e| { - napi::Error::from_reason(format!("Failed to create IPC file: {}", e)) - })?; + let schema = self.inner_ref()?.schema().await.default_error()?; let mut writer = FileWriter::try_new(vec![], &schema) .map_err(|e| napi::Error::from_reason(format!("Failed to create IPC file: {}", e)))?; writer @@ -100,12 +97,7 @@ impl Table { return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode))); }; - op.execute().await.map_err(|e| { - napi::Error::from_reason(format!( - "Failed to add batches to table {}: {}", - self.name, e - )) - }) + op.execute().await.default_error() } #[napi(catch_unwind)] @@ -114,22 +106,12 @@ impl Table { .count_rows(filter) .await .map(|val| val as i64) - .map_err(|e| { - napi::Error::from_reason(format!( - "Failed to count rows in table {}: {}", - self.name, e - )) - }) + .default_error() } #[napi(catch_unwind)] pub async fn delete(&self, predicate: String) -> napi::Result<()> { - self.inner_ref()?.delete(&predicate).await.map_err(|e| { - napi::Error::from_reason(format!( - "Failed to delete rows in table {}: predicate={}", - self.name, e - )) - }) + self.inner_ref()?.delete(&predicate).await.default_error() } #[napi(catch_unwind)] @@ -187,12 +169,7 @@ impl Table { self.inner_ref()? .add_columns(transforms, None) .await - .map_err(|err| { - napi::Error::from_reason(format!( - "Failed to add columns to table {}: {}", - self.name, err - )) - })?; + .default_error()?; Ok(()) } @@ -213,12 +190,7 @@ impl Table { self.inner_ref()? .alter_columns(&alterations) .await - .map_err(|err| { - napi::Error::from_reason(format!( - "Failed to alter columns in table {}: {}", - self.name, err - )) - })?; + .default_error()?; Ok(()) } @@ -228,12 +200,7 @@ impl Table { self.inner_ref()? .drop_columns(&col_refs) .await - .map_err(|err| { - napi::Error::from_reason(format!( - "Failed to drop columns from table {}: {}", - self.name, err - )) - })?; + .default_error()?; Ok(()) } diff --git a/python/Cargo.toml b/python/Cargo.toml index 2d42f80f..372dc85a 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -16,7 +16,7 @@ crate-type = ["cdylib"] [dependencies] arrow = { version = "52.1", features = ["pyarrow"] } lancedb = { path = "../rust/lancedb" } -env_logger = "0.10" +env_logger.workspace = true pyo3 = { version = "0.21", features = ["extension-module", "abi3-py38", "gil-refs"] } # Using this fork for now: https://github.com/awestlake87/pyo3-asyncio/issues/119 # pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] } diff --git a/rust/lancedb/src/remote/client.rs b/rust/lancedb/src/remote/client.rs index 83d5a14f..50789108 100644 --- a/rust/lancedb/src/remote/client.rs +++ b/rust/lancedb/src/remote/client.rs @@ -266,6 +266,7 @@ impl RestfulLanceDbClient { Some(host_override) => host_override, None => format!("https://{}.{}.api.lancedb.com", db_name, region), }; + debug!("Created client for host: {}", host); let retry_config = client_config.retry_config.try_into()?; Ok(Self { client, @@ -340,6 +341,8 @@ impl RestfulLanceDbClient { request_id }; + debug!("Sending request_id={}: {:?}", request_id, &request); + if with_retry { self.send_with_retry_impl(client, request, request_id).await } else { @@ -348,6 +351,10 @@ impl RestfulLanceDbClient { .send(&client, request) .await .err_to_http(request_id.clone())?; + debug!( + "Received response for request_id={}: {:?}", + request_id, &response + ); Ok((request_id, response)) } } @@ -374,7 +381,11 @@ impl RestfulLanceDbClient { .map(|r| (r.status(), r)); match response { Ok((status, response)) if status.is_success() => { - return Ok((retry_counter.request_id, response)) + debug!( + "Received response for request_id={}: {:?}", + retry_counter.request_id, &response + ); + return Ok((retry_counter.request_id, response)); } Ok((status, response)) if self.retry_config.statuses.contains(&status) => { let source = self