mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-14 10:30:40 +00:00
Fixes #2716 ## Summary Add support for querying with Float16Array, Float64Array, and Uint8Array vectors in the Node.js SDK, eliminating precision loss from the previous \Float32Array.from()\ conversion. ## Implementation Follows @wjones127's [5-step plan](https://github.com/lancedb/lancedb/issues/2716#issuecomment-3447750543): ### Rust (\ odejs/src/query.rs\) 1. \ytes_to_arrow_array(data: Uint8Array, dtype: String)\ helper that: - Creates an Arrow \Buffer\ from the raw bytes - Wraps it in a typed \ScalarBuffer<T>\ based on the dtype enum - Constructs a \PrimitiveArray\ and returns \Arc<dyn Array>\ 2. \ earest_to_raw(data, dtype)\ and \dd_query_vector_raw(data, dtype)\ NAPI methods that pass the type-erased array to the core \ earest_to\/\dd_query_vector\ which already accept \impl IntoQueryVector\ for \Arc<dyn Array>\ ### TypeScript (\ odejs/lancedb/query.ts\, \rrow.ts\) 3. Extended \IntoVector\ type to include \Uint8Array\ (and \Float16Array\ via runtime check for Node 22+) 4. \xtractVectorBuffer()\ helper detects non-Float32 typed arrays and extracts their underlying byte buffer + dtype string 5. \ earestTo()\ and \ddQueryVector()\ route through the raw NAPI path when the input is Float16/Float64/Uint8 ### Backward compatibility Existing \Float32Array\ and \ umber[]\ inputs are unchanged -- they still use the original \ earest_to(Float32Array)\ NAPI method. The new raw path is only used when a non-Float32 typed array is detected. ## Usage \\\ ypescript // Float16Array (Node 22+) -- no precision loss const f16vec = new Float16Array([0.1, 0.2, 0.3]); const results = await table.query().nearestTo(f16vec).limit(10).toArray(); // Float64Array -- no precision loss const f64vec = new Float64Array([0.1, 0.2, 0.3]); const results = await table.query().nearestTo(f64vec).limit(10).toArray(); // Uint8Array (binary embeddings) const u8vec = new Uint8Array([1, 0, 1, 1, 0]); const results = await table.query().nearestTo(u8vec).limit(10).toArray(); // Existing usage unchanged const results = await table.query().nearestTo([0.1, 0.2, 0.3]).limit(10).toArray(); \\\ ## Note on dependencies The Rust side uses \rrow_array\, \rrow_buffer\, and \half\ crates. These should already be in the dependency tree via \lancedb\ core, but \Cargo.toml\ may need explicit entries for \half\ and the arrow sub-crates in the nodejs workspace. --------- Signed-off-by: Vedant Madane <6527493+VedantMadane@users.noreply.github.com> Co-authored-by: Will Jones <willjones127@gmail.com>
44 lines
1.1 KiB
TOML
44 lines
1.1 KiB
TOML
[package]
|
|
name = "lancedb-nodejs"
|
|
edition.workspace = true
|
|
version = "0.27.2-beta.1"
|
|
license.workspace = true
|
|
description.workspace = true
|
|
repository.workspace = true
|
|
keywords.workspace = true
|
|
categories.workspace = true
|
|
|
|
[lib]
|
|
crate-type = ["cdylib"]
|
|
|
|
[dependencies]
|
|
async-trait.workspace = true
|
|
arrow-ipc.workspace = true
|
|
arrow-array.workspace = true
|
|
arrow-buffer = "57.2"
|
|
half.workspace = true
|
|
arrow-schema.workspace = true
|
|
env_logger.workspace = true
|
|
futures.workspace = true
|
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
|
napi = { version = "3.8.3", default-features = false, features = [
|
|
"napi9",
|
|
"async"
|
|
] }
|
|
napi-derive = "3.5.2"
|
|
# Prevent dynamic linking of lzma, which comes from datafusion
|
|
lzma-sys = { version = "0.1", features = ["static"] }
|
|
log.workspace = true
|
|
|
|
# Pin to resolve build failures; update periodically for security patches.
|
|
aws-lc-sys = "=0.38.0"
|
|
aws-lc-rs = "=1.16.1"
|
|
|
|
[build-dependencies]
|
|
napi-build = "2.3.1"
|
|
|
|
[features]
|
|
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]
|
|
fp16kernels = ["lancedb/fp16kernels"]
|
|
remote = ["lancedb/remote"]
|