From e9e904783c5973e856c36b61bcecdf388b6bc130 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Tue, 27 Jan 2026 19:32:59 -0800 Subject: [PATCH] feat: allow the permutation builder memory limit to be configured by env var (#2946) Running into issues with DF sorting again. This will at least allow the memory limit to be set large to bypass problems. --- rust/lancedb/src/dataloader/permutation/builder.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/rust/lancedb/src/dataloader/permutation/builder.rs b/rust/lancedb/src/dataloader/permutation/builder.rs index 66634ea75..0c841e1b9 100644 --- a/rust/lancedb/src/dataloader/permutation/builder.rs +++ b/rust/lancedb/src/dataloader/permutation/builder.rs @@ -27,6 +27,8 @@ pub const SRC_ROW_ID_COL: &str = "row_id"; pub const SPLIT_NAMES_CONFIG_KEY: &str = "split_names"; +pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024; + /// Where to store the permutation table #[derive(Debug, Clone, Default)] enum PermutationDestination { @@ -167,10 +169,20 @@ impl PermutationBuilder { &self, data: SendableRecordBatchStream, ) -> Result { + let memory_limit = std::env::var("LANCEDB_PERM_BUILDER_MEMORY_LIMIT") + .unwrap_or_else(|_| DEFAULT_MEMORY_LIMIT.to_string()) + .parse::() + .unwrap_or_else(|_| { + log::error!( + "Failed to parse LANCEDB_PERM_BUILDER_MEMORY_LIMIT, using default: {}", + DEFAULT_MEMORY_LIMIT + ); + DEFAULT_MEMORY_LIMIT + }); let ctx = SessionContext::new_with_config_rt( SessionConfig::default(), RuntimeEnvBuilder::new() - .with_memory_limit(100 * 1024 * 1024, 1.0) + .with_memory_limit(memory_limit, 1.0) .with_disk_manager_builder( DiskManagerBuilder::default() .with_mode(self.config.temp_dir.to_disk_manager_mode()),