From 82b25a71e956a907ad153bf72ebade05cbb8a2c0 Mon Sep 17 00:00:00 2001 From: Colin Patrick McCabe Date: Fri, 26 Sep 2025 11:24:43 -0700 Subject: [PATCH] feat: add support for test_remote_connections (#2666) Add a new test feature which allows for running the lancedb tests against a remote server. Convert over a few tests in src/connection.rs as a proof of concept. To make local development easier, the remote tests can be run locally from a Makefile. This file can also be used to run the feature tests, with a single invocation of 'make'. (The feature tests require bringing up a docker compose environment.) --- .github/workflows/rust.yml | 10 +-- Cargo.lock | 1 + ci/create_lancedb_test_connection.sh | 4 + ci/run_with_docker_compose.sh | 18 ++++ ci/run_with_test_connection.sh | 51 +++++++++++ rust/lancedb/Cargo.toml | 1 + rust/lancedb/Makefile | 19 ++++ rust/lancedb/src/connection.rs | 18 ++-- rust/lancedb/src/lib.rs | 1 + rust/lancedb/src/test_connection.rs | 126 +++++++++++++++++++++++++++ 10 files changed, 231 insertions(+), 18 deletions(-) create mode 100755 ci/create_lancedb_test_connection.sh create mode 100755 ci/run_with_docker_compose.sh create mode 100755 ci/run_with_test_connection.sh create mode 100644 rust/lancedb/Makefile create mode 100644 rust/lancedb/src/test_connection.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e483f849..6e3be59c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -96,6 +96,7 @@ jobs: # Need up-to-date compilers for kernels CC: clang-18 CXX: clang++-18 + GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }} steps: - uses: actions/checkout@v4 with: @@ -117,15 +118,14 @@ jobs: sudo chmod 600 /swapfile sudo mkswap /swapfile sudo swapon /swapfile - - name: Start S3 integration test environment - working-directory: . - run: docker compose up --detach --wait - name: Build run: cargo build --all-features --tests --locked --examples - - name: Run tests - run: cargo test --all-features --locked + - name: Run feature tests + run: make -C ./lancedb feature-tests - name: Run examples run: cargo run --example simple --locked + - name: Run remote tests + run: make -C ./lancedb remote-tests macos: timeout-minutes: 30 diff --git a/Cargo.lock b/Cargo.lock index 1d727748..55a60454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5639,6 +5639,7 @@ dependencies = [ name = "lancedb" version = "0.22.2-beta.0" dependencies = [ + "anyhow", "arrow", "arrow-array", "arrow-cast", diff --git a/ci/create_lancedb_test_connection.sh b/ci/create_lancedb_test_connection.sh new file mode 100755 index 00000000..5dac4f70 --- /dev/null +++ b/ci/create_lancedb_test_connection.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +export RUST_LOG=info +exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}" diff --git a/ci/run_with_docker_compose.sh b/ci/run_with_docker_compose.sh new file mode 100755 index 00000000..8184400a --- /dev/null +++ b/ci/run_with_docker_compose.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +# +# A script for running the given command together with a docker compose environment. +# + +# Bring down the docker setup once the command is done running. +tear_down() { + docker compose -p fixture down +} +trap tear_down EXIT + +set +xe + +# Clean up any existing docker setup and bring up a new one. +docker compose -p fixture up --detach --wait || exit 1 + +"${@}" diff --git a/ci/run_with_test_connection.sh b/ci/run_with_test_connection.sh new file mode 100755 index 00000000..8d54c423 --- /dev/null +++ b/ci/run_with_test_connection.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# +# A script for running the given command together with the lancedb cli. +# + +die() { + echo $? + exit 1 +} + +check_command_exists() { + command="${1}" + which ${command} &> /dev/null || \ + die "Unable to locate command: ${command}. Did you install it?" +} + +if [[ ! -e ./lancedb ]]; then + ARCH="x64" + if [[ $OSTYPE == 'darwin'* ]]; then + UNAME=$(uname -m) + if [[ $UNAME == 'arm64' ]]; then + ARCH='arm64' + fi + OSTYPE="macos" + elif [[ $OSTYPE == 'linux'* ]]; then + if [[ $UNAME == 'aarch64' ]]; then + ARCH='arm64' + fi + OSTYPE="linux" + else + die "unknown OSTYPE: $OSTYPE" + fi + + check_command_exists gh + TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz" + gh release \ + --repo lancedb/sophon \ + download lancedb-cli-v0.0.3 \ + --pattern "${TARGET}" \ + || die "failed to fetch cli." + + check_command_exists tar + tar xvf "${TARGET}" || die "tar failed." + [[ -e ./lancedb ]] || die "failed to extract lancedb." +fi + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh" + +"${@}" diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 2ef4ee73..e4c8cc68 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -82,6 +82,7 @@ crunchy.workspace = true bytemuck_derive.workspace = true [dev-dependencies] +anyhow = "1" tempfile = "3.5.0" rand = { version = "0.9", features = ["small_rng"] } random_word = { version = "0.4.3", features = ["en"] } diff --git a/rust/lancedb/Makefile b/rust/lancedb/Makefile new file mode 100644 index 00000000..7c487370 --- /dev/null +++ b/rust/lancedb/Makefile @@ -0,0 +1,19 @@ +# +# Makefile for running tests. +# + +# Run all tests. +all-tests: feature-tests remote-tests + +# Run tests for every feature. This requires using docker compose to set up +# the environment. +feature-tests: + ../../ci/run_with_docker_compose.sh \ + cargo test --all-features --tests --locked --examples +.PHONY: feature-tests + +# Run tests against remote endpoints. +remote-tests: + ../../ci/run_with_test_connection.sh \ + cargo test --features remote --locked +.PHONY: remote-tests diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 08a10444..66e24161 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -1170,6 +1170,7 @@ mod tests { use crate::database::listing::{ListingDatabaseOptions, NewTableConfig}; use crate::query::QueryBase; use crate::query::{ExecutableQuery, QueryExecutionOptions}; + use crate::test_connection::test_utils::new_test_connection; use arrow::compute::concat_batches; use arrow_array::RecordBatchReader; use arrow_schema::{DataType, Field, Schema}; @@ -1185,11 +1186,8 @@ mod tests { #[tokio::test] async fn test_connect() { - let tmp_dir = tempdir().unwrap(); - let uri = tmp_dir.path().to_str().unwrap(); - let db = connect(uri).execute().await.unwrap(); - - assert_eq!(db.uri, uri); + let tc = new_test_connection().await.unwrap(); + assert_eq!(tc.connection.uri, tc.uri); } #[cfg(not(windows))] @@ -1255,16 +1253,10 @@ mod tests { assert_eq!(tables, names[..7]); } - #[tokio::test] - async fn test_connect_s3() { - // let db = Database::connect("s3://bucket/path/to/database").await.unwrap(); - } - #[tokio::test] async fn test_open_table() { - let tmp_dir = tempdir().unwrap(); - let uri = tmp_dir.path().to_str().unwrap(); - let db = connect(uri).execute().await.unwrap(); + let tc = new_test_connection().await.unwrap(); + let db = tc.connection; assert_eq!(db.table_names().execute().await.unwrap().len(), 0); // open non-exist table diff --git a/rust/lancedb/src/lib.rs b/rust/lancedb/src/lib.rs index e30ca714..9637cf39 100644 --- a/rust/lancedb/src/lib.rs +++ b/rust/lancedb/src/lib.rs @@ -206,6 +206,7 @@ pub mod query; pub mod remote; pub mod rerankers; pub mod table; +pub mod test_connection; pub mod utils; use std::fmt::Display; diff --git a/rust/lancedb/src/test_connection.rs b/rust/lancedb/src/test_connection.rs new file mode 100644 index 00000000..2afd41ca --- /dev/null +++ b/rust/lancedb/src/test_connection.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The LanceDB Authors + +//! Functions for testing connections. + +#[cfg(test)] +pub mod test_utils { + use regex::Regex; + use std::env; + use std::io::{BufRead, BufReader}; + use std::process::{Child, ChildStdout, Command, Stdio}; + + use crate::{connect, Connection}; + use anyhow::{bail, Result}; + use tempfile::{tempdir, TempDir}; + + pub struct TestConnection { + pub uri: String, + pub connection: Connection, + _temp_dir: Option, + _process: Option, + } + + struct TestProcess { + child: Child, + } + + impl Drop for TestProcess { + #[allow(unused_must_use)] + fn drop(&mut self) { + self.child.kill(); + } + } + + pub async fn new_test_connection() -> Result { + match env::var("CREATE_LANCEDB_TEST_CONNECTION_SCRIPT") { + Ok(script_path) => new_remote_connection(&script_path).await, + Err(_e) => new_local_connection().await, + } + } + + async fn new_remote_connection(script_path: &str) -> Result { + let temp_dir = tempdir()?; + let data_path = temp_dir.path().to_str().unwrap().to_string(); + let child_result = Command::new(script_path) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .arg(data_path.clone()) + .spawn(); + if child_result.is_err() { + bail!(format!( + "Unable to run {}: {:?}", + script_path, + child_result.err() + )); + } + let mut process = TestProcess { + child: child_result.unwrap(), + }; + let stdout = BufReader::new(process.child.stdout.take().unwrap()); + let port = read_process_port(stdout)?; + let uri = "db://test"; + let host_override = format!("http://localhost:{}", port); + let connection = create_new_connection(uri, &host_override).await?; + Ok(TestConnection { + uri: uri.to_string(), + connection, + _temp_dir: Some(temp_dir), + _process: Some(process), + }) + } + + fn read_process_port(mut stdout: BufReader) -> Result { + let mut line = String::new(); + let re = Regex::new(r"Query node now listening on 0.0.0.0:(.*)").unwrap(); + loop { + let result = stdout.read_line(&mut line); + if let Err(err) = result { + bail!(format!( + "read_process_port: error while reading from process output: {}", + err + )); + } else if result.unwrap() == 0 { + bail!("read_process_port: hit EOF before reading port from process output."); + } + if re.is_match(&line) { + let caps = re.captures(&line).unwrap(); + return Ok(caps[1].to_string()); + } + } + } + + #[cfg(feature = "remote")] + async fn create_new_connection( + uri: &str, + host_override: &str, + ) -> crate::error::Result { + connect(uri) + .region("us-east-1") + .api_key("sk_localtest") + .host_override(host_override) + .execute() + .await + } + + #[cfg(not(feature = "remote"))] + async fn create_new_connection( + _uri: &str, + _host_override: &str, + ) -> crate::error::Result { + panic!("remote feature not supported"); + } + + async fn new_local_connection() -> Result { + let temp_dir = tempdir()?; + let uri = temp_dir.path().to_str().unwrap(); + let connection = connect(uri).execute().await?; + Ok(TestConnection { + uri: uri.to_string(), + connection, + _temp_dir: Some(temp_dir), + _process: None, + }) + } +}