Compare commits

..

2 Commits

Author SHA1 Message Date
Xuanwo
a8f57e8043 Format code
Signed-off-by: Xuanwo <github@xuanwo.io>
2025-09-09 01:08:02 +08:00
Xuanwo
c36409d3b8 feat: Expose disableScoringAutoprojection to lancedb
Signed-off-by: Xuanwo <github@xuanwo.io>
2025-09-09 01:06:29 +08:00
34 changed files with 676 additions and 1155 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.22.0-beta.0" current_version = "0.21.4-beta.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

1611
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,14 +15,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.34.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance = { "version" = "=0.33.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.34.0", default-features = false, "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-io = { "version" = "=0.33.0", default-features = false, "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-index = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-linalg = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-table = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-testing = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-datafusion = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.34.0", "tag" = "v0.34.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-encoding = { "version" = "=0.33.0", "tag" = "v0.33.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "55.1" arrow-array = "55.1"

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.0-beta.0</version> <version>0.21.4-beta.1</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.0-beta.0</version> <version>0.21.4-beta.1</version>
<relativePath>../pom.xml</relativePath> <relativePath>../pom.xml</relativePath>
</parent> </parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId> <groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId> <artifactId>lancedb-parent</artifactId>
<version>0.22.0-beta.0</version> <version>0.21.4-beta.1</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>${project.artifactId}</name> <name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description> <description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package] [package]
name = "lancedb-nodejs" name = "lancedb-nodejs"
edition.workspace = true edition.workspace = true
version = "0.22.0-beta.0" version = "0.21.4-beta.1"
license.workspace = true license.workspace = true
description.workspace = true description.workspace = true
repository.workspace = true repository.workspace = true

View File

@@ -363,6 +363,23 @@ export class StandardQueryBase<
return this.where(predicate); return this.where(predicate);
} }
/**
* Disable autoprojection of scoring columns.
*
* When you specify an explicit projection with {@link select} that does not
* include scoring columns (e.g. `_score` for FTS or `_distance` for vector
* search), Lance currently auto-includes those columns and emits a
* deprecation warning. Calling this method disables that behavior so the
* scoring columns are only returned if explicitly selected.
*/
disableScoringAutoprojection(): this {
this.doCall((inner: NativeQueryType) => {
// @ts-expect-error method is present on Query and VectorQuery only
inner.disableScoringAutoprojection();
});
return this;
}
fullTextSearch( fullTextSearch(
query: string | FullTextQuery, query: string | FullTextQuery,
options?: Partial<FullTextSearchOptions>, options?: Partial<FullTextSearchOptions>,

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-musl", "name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node", "main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-musl", "name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node", "main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-arm64-msvc", "name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": [ "os": [
"win32" "win32"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -11,7 +11,7 @@
"ann" "ann"
], ],
"private": false, "private": false,
"version": "0.22.0-beta.0", "version": "0.21.4-beta.1",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -88,6 +88,11 @@ impl Query {
self.inner = self.inner.clone().with_row_id(); self.inner = self.inner.clone().with_row_id();
} }
#[napi]
pub fn disable_scoring_autoprojection(&mut self) {
self.inner = self.inner.clone().disable_scoring_autoprojection();
}
#[napi(catch_unwind)] #[napi(catch_unwind)]
pub async fn execute( pub async fn execute(
&self, &self,
@@ -265,6 +270,11 @@ impl VectorQuery {
self.inner = self.inner.clone().with_row_id(); self.inner = self.inner.clone().with_row_id();
} }
#[napi]
pub fn disable_scoring_autoprojection(&mut self) {
self.inner = self.inner.clone().disable_scoring_autoprojection();
}
#[napi] #[napi]
pub fn rerank(&mut self, callbacks: RerankerCallbacks) { pub fn rerank(&mut self, callbacks: RerankerCallbacks) {
self.inner = self self.inner = self
@@ -480,7 +490,6 @@ impl JsFullTextQuery {
} }
#[napi(factory)] #[napi(factory)]
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> { pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> {
let mut sub_queries = Vec::with_capacity(queries.len()); let mut sub_queries = Vec::with_capacity(queries.len());
for (occur, q) in queries { for (occur, q) in queries {

View File

@@ -94,7 +94,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
env: napi::sys::napi_env, env: napi::sys::napi_env,
napi_val: napi::sys::napi_value, napi_val: napi::sys::napi_value,
) -> napi::Result<Self> { ) -> napi::Result<Self> {
let object: napi::bindgen_prelude::ClassInstance<Self> = let object: napi::bindgen_prelude::ClassInstance<Session> =
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?; napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
let copy = object.clone(); let copy = object.clone();
Ok(copy) Ok(copy)

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.25.0-beta.1" current_version = "0.24.4-beta.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.25.0-beta.1" version = "0.24.4-beta.1"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true

View File

@@ -95,10 +95,9 @@ class DBConnection(EnforceOverrides):
@abstractmethod @abstractmethod
def table_names( def table_names(
self, self,
namespace: List[str] = [],
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: int = 10, limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]: ) -> Iterable[str]:
"""List all tables in this database, in sorted order """List all tables in this database, in sorted order
@@ -544,10 +543,9 @@ class LanceDBConnection(DBConnection):
@override @override
def table_names( def table_names(
self, self,
namespace: List[str] = [],
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: int = 10, limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]: ) -> Iterable[str]:
"""Get the names of all tables in the database. The names are sorted. """Get the names of all tables in the database. The names are sorted.

View File

@@ -138,10 +138,9 @@ class LanceNamespaceDBConnection(DBConnection):
@override @override
def table_names( def table_names(
self, self,
namespace: List[str] = [],
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: int = 10, limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]: ) -> Iterable[str]:
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit) request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
response = self._ns.list_tables(request) response = self._ns.list_tables(request)
@@ -191,7 +190,7 @@ class LanceNamespaceDBConnection(DBConnection):
json_schema = _convert_pyarrow_schema_to_json(schema) json_schema = _convert_pyarrow_schema_to_json(schema)
# Create table request with namespace # Create table request with namespace
table_id = namespace + [name] table_id = (namespace or []) + [name]
request = CreateTableRequest(id=table_id, var_schema=json_schema) request = CreateTableRequest(id=table_id, var_schema=json_schema)
# Create empty Arrow IPC stream bytes # Create empty Arrow IPC stream bytes
@@ -220,7 +219,7 @@ class LanceNamespaceDBConnection(DBConnection):
storage_options: Optional[Dict[str, str]] = None, storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None, index_cache_size: Optional[int] = None,
) -> Table: ) -> Table:
table_id = namespace + [name] table_id = (namespace or []) + [name]
request = DescribeTableRequest(id=table_id) request = DescribeTableRequest(id=table_id)
response = self._ns.describe_table(request) response = self._ns.describe_table(request)
@@ -237,9 +236,9 @@ class LanceNamespaceDBConnection(DBConnection):
) )
@override @override
def drop_table(self, name: str, namespace: List[str] = []): def drop_table(self, name: str, namespace: Optional[List[str]] = None):
# Use namespace drop_table directly # Use namespace drop_table directly
table_id = namespace + [name] table_id = (namespace or []) + [name]
request = DropTableRequest(id=table_id) request = DropTableRequest(id=table_id)
self._ns.drop_table(request) self._ns.drop_table(request)
@@ -248,8 +247,8 @@ class LanceNamespaceDBConnection(DBConnection):
self, self,
cur_name: str, cur_name: str,
new_name: str, new_name: str,
cur_namespace: List[str] = [], cur_namespace: Optional[List[str]] = None,
new_namespace: List[str] = [], new_namespace: Optional[List[str]] = None,
): ):
raise NotImplementedError( raise NotImplementedError(
"rename_table is not supported for namespace connections" "rename_table is not supported for namespace connections"
@@ -262,7 +261,7 @@ class LanceNamespaceDBConnection(DBConnection):
) )
@override @override
def drop_all_tables(self, namespace: List[str] = []): def drop_all_tables(self, namespace: Optional[List[str]] = None):
for table_name in self.table_names(namespace=namespace): for table_name in self.table_names(namespace=namespace):
self.drop_table(table_name, namespace=namespace) self.drop_table(table_name, namespace=namespace)

View File

@@ -943,22 +943,20 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100] >>> query = [100, 100]
>>> plan = table.search(query).analyze_plan() >>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=... AnalyzeExec verbose=true, metrics=[]
TracedExec, metrics=[], cumulative_cpu=... TracedExec, metrics=[]
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=... ProjectionExec: expr=[...], metrics=[...]
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=... GlobalLimitExec: skip=0, fetch=10, metrics=[...]
FilterExec: _distance@2 IS NOT NULL, FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=... metrics=[output_rows=..., elapsed_compute=...]
SortExec: TopK(fetch=10), expr=[...], SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...], preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...], metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
cumulative_cpu=...
KNNVectorDistance: metric=l2, KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...], metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
cumulative_cpu=...
LanceRead: uri=..., projection=[vector], ... LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=..., metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...], cumulative_cpu=... bytes_read=..., iops=..., requests=...]
Returns Returns
------- -------

View File

@@ -151,10 +151,9 @@ class RemoteDBConnection(DBConnection):
@override @override
def table_names( def table_names(
self, self,
namespace: List[str] = [],
page_token: Optional[str] = None, page_token: Optional[str] = None,
limit: int = 10, limit: int = 10,
*,
namespace: List[str] = [],
) -> Iterable[str]: ) -> Iterable[str]:
"""List the names of all tables in the database. """List the names of all tables in the database.
@@ -344,7 +343,7 @@ class RemoteDBConnection(DBConnection):
return RemoteTable(table, self.db_name) return RemoteTable(table, self.db_name)
@override @override
def drop_table(self, name: str, namespace: List[str] = []): def drop_table(self, name: str, namespace: Optional[List[str]] = None):
"""Drop a table from the database. """Drop a table from the database.
Parameters Parameters
@@ -362,8 +361,8 @@ class RemoteDBConnection(DBConnection):
self, self,
cur_name: str, cur_name: str,
new_name: str, new_name: str,
cur_namespace: List[str] = [], cur_namespace: Optional[List[str]] = None,
new_namespace: List[str] = [], new_namespace: Optional[List[str]] = None,
): ):
"""Rename a table in the database. """Rename a table in the database.

View File

@@ -175,18 +175,6 @@ def test_table_names(tmp_db: lancedb.DBConnection):
tmp_db.create_table("test3", data=data) tmp_db.create_table("test3", data=data)
assert tmp_db.table_names() == ["test1", "test2", "test3"] assert tmp_db.table_names() == ["test1", "test2", "test3"]
# Test that positional arguments for page_token and limit
result = list(tmp_db.table_names("test1", 1)) # page_token="test1", limit=1
assert result == ["test2"], f"Expected ['test2'], got {result}"
# Test mixed positional and keyword arguments
result = list(tmp_db.table_names("test2", limit=2))
assert result == ["test3"], f"Expected ['test3'], got {result}"
# Test that namespace parameter can be passed as keyword
result = list(tmp_db.table_names(namespace=[]))
assert len(result) == 3
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_table_names_async(tmp_path): async def test_table_names_async(tmp_path):

View File

@@ -420,10 +420,6 @@ class TestNamespaceConnection:
assert "table2" in table_names assert "table2" in table_names
assert len(table_names) == 1 assert len(table_names) == 1
# Test that drop_table works without explicit namespace parameter
db.drop_table("table2")
assert len(list(db.table_names())) == 0
# Should not be able to open dropped table # Should not be able to open dropped table
with pytest.raises(RuntimeError): with pytest.raises(RuntimeError):
db.open_table("table1") db.open_table("table1")
@@ -491,11 +487,6 @@ class TestNamespaceConnection:
# Verify all tables are gone # Verify all tables are gone
assert len(list(db.table_names())) == 0 assert len(list(db.table_names())) == 0
# Test that table_names works with keyword-only namespace parameter
db.create_table("test_table", schema=schema)
result = list(db.table_names(namespace=[]))
assert "test_table" in result
def test_table_operations(self): def test_table_operations(self):
"""Test various table operations through namespace.""" """Test various table operations through namespace."""
db = lancedb.connect_namespace("temp", {"root": self.temp_dir}) db = lancedb.connect_namespace("temp", {"root": self.temp_dir})

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.22.0-beta.0" version = "0.21.4-beta.1"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -9,7 +9,7 @@ use futures::{stream::BoxStream, TryFutureExt};
use lance::io::WrappingObjectStore; use lance::io::WrappingObjectStore;
use object_store::{ use object_store::{
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, UploadPart,
}; };
use async_trait::async_trait; use async_trait::async_trait;
@@ -73,7 +73,7 @@ impl ObjectStore for MirroringObjectStore {
async fn put_multipart_opts( async fn put_multipart_opts(
&self, &self,
location: &Path, location: &Path,
opts: PutMultipartOptions, opts: PutMultipartOpts,
) -> Result<Box<dyn MultipartUpload>> { ) -> Result<Box<dyn MultipartUpload>> {
if location.primary_only() { if location.primary_only() {
return self.primary.put_multipart_opts(location, opts).await; return self.primary.put_multipart_opts(location, opts).await;
@@ -170,11 +170,7 @@ impl MirroringObjectStoreWrapper {
} }
impl WrappingObjectStore for MirroringObjectStoreWrapper { impl WrappingObjectStore for MirroringObjectStoreWrapper {
fn wrap( fn wrap(&self, primary: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
&self,
primary: Arc<dyn ObjectStore>,
_storage_options: Option<&std::collections::HashMap<String, String>>,
) -> Arc<dyn ObjectStore> {
Arc::new(MirroringObjectStore { Arc::new(MirroringObjectStore {
primary, primary,
secondary: self.secondary.clone(), secondary: self.secondary.clone(),

View File

@@ -11,7 +11,7 @@ use futures::stream::BoxStream;
use lance::io::WrappingObjectStore; use lance::io::WrappingObjectStore;
use object_store::{ use object_store::{
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
}; };
#[derive(Debug, Default)] #[derive(Debug, Default)]
@@ -50,11 +50,7 @@ impl IoStatsHolder {
} }
impl WrappingObjectStore for IoStatsHolder { impl WrappingObjectStore for IoStatsHolder {
fn wrap( fn wrap(&self, target: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
&self,
target: Arc<dyn ObjectStore>,
_storage_options: Option<&std::collections::HashMap<String, String>>,
) -> Arc<dyn ObjectStore> {
Arc::new(IoTrackingStore { Arc::new(IoTrackingStore {
target, target,
stats: self.0.clone(), stats: self.0.clone(),
@@ -110,7 +106,7 @@ impl ObjectStore for IoTrackingStore {
async fn put_multipart_opts( async fn put_multipart_opts(
&self, &self,
location: &Path, location: &Path,
opts: PutMultipartOptions, opts: PutMultipartOpts,
) -> OSResult<Box<dyn MultipartUpload>> { ) -> OSResult<Box<dyn MultipartUpload>> {
let target = self.target.put_multipart_opts(location, opts).await?; let target = self.target.put_multipart_opts(location, opts).await?;
Ok(Box::new(IoTrackingMultipartUpload { Ok(Box::new(IoTrackingMultipartUpload {

View File

@@ -448,6 +448,15 @@ pub trait QueryBase {
/// the scores are converted to ranks and then normalized. If "Score", the /// the scores are converted to ranks and then normalized. If "Score", the
/// scores are normalized directly. /// scores are normalized directly.
fn norm(self, norm: NormalizeMethod) -> Self; fn norm(self, norm: NormalizeMethod) -> Self;
/// Disable autoprojection of scoring columns.
///
/// When an explicit projection is provided that does not include scoring
/// columns (e.g. `_score` for FTS or `_distance` for vector search), the
/// current default behavior is to auto-include those columns and emit a
/// deprecation warning. Calling this adopts the future behavior and avoids
/// the warning.
fn disable_scoring_autoprojection(self) -> Self;
} }
pub trait HasQuery { pub trait HasQuery {
@@ -507,6 +516,11 @@ impl<T: HasQuery> QueryBase for T {
self.mut_query().norm = Some(norm); self.mut_query().norm = Some(norm);
self self
} }
fn disable_scoring_autoprojection(mut self) -> Self {
self.mut_query().disable_scoring_autoprojection = true;
self
}
} }
/// Options for controlling the execution of a query /// Options for controlling the execution of a query
@@ -645,6 +659,10 @@ pub struct QueryRequest {
/// Configure how query results are normalized when doing hybrid search /// Configure how query results are normalized when doing hybrid search
pub norm: Option<NormalizeMethod>, pub norm: Option<NormalizeMethod>,
/// If true, do not auto-include scoring columns when they are
/// omitted from an explicit projection.
pub disable_scoring_autoprojection: bool,
} }
impl Default for QueryRequest { impl Default for QueryRequest {
@@ -660,6 +678,7 @@ impl Default for QueryRequest {
prefilter: true, prefilter: true,
reranker: None, reranker: None,
norm: None, norm: None,
disable_scoring_autoprojection: false,
} }
} }
} }

View File

@@ -372,6 +372,9 @@ impl<S: HttpSend> RemoteTable<S> {
params: &QueryRequest, params: &QueryRequest,
) -> Result<()> { ) -> Result<()> {
body["prefilter"] = params.prefilter.into(); body["prefilter"] = params.prefilter.into();
if params.disable_scoring_autoprojection {
body["disable_scoring_autoprojection"] = serde_json::Value::Bool(true);
}
if let Some(offset) = params.offset { if let Some(offset) = params.offset {
body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset)); body["offset"] = serde_json::Value::Number(serde_json::Number::from(offset));
} }

View File

@@ -32,7 +32,7 @@ use lance::index::vector::VectorIndexParams;
use lance::io::WrappingObjectStore; use lance::io::WrappingObjectStore;
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan}; use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
use lance_datafusion::utils::StreamingWriteSource; use lance_datafusion::utils::StreamingWriteSource;
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams}; use lance_index::scalar::{ScalarIndexParams, ScalarIndexType};
use lance_index::vector::hnsw::builder::HnswBuildParams; use lance_index::vector::hnsw::builder::HnswBuildParams;
use lance_index::vector::ivf::IvfBuildParams; use lance_index::vector::ivf::IvfBuildParams;
use lance_index::vector::pq::PQBuildParams; use lance_index::vector::pq::PQBuildParams;
@@ -1778,9 +1778,7 @@ impl NativeTable {
); );
Ok(Box::new(lance_idx_params)) Ok(Box::new(lance_idx_params))
} else if supported_btree_data_type(field.data_type()) { } else if supported_btree_data_type(field.data_type()) {
Ok(Box::new(ScalarIndexParams::for_builtin( Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::BTree)))
BuiltinIndexType::BTree,
)))
} else { } else {
return Err(Error::InvalidInput { return Err(Error::InvalidInput {
message: format!( message: format!(
@@ -1793,21 +1791,15 @@ impl NativeTable {
} }
Index::BTree(_) => { Index::BTree(_) => {
Self::validate_index_type(field, "BTree", supported_btree_data_type)?; Self::validate_index_type(field, "BTree", supported_btree_data_type)?;
Ok(Box::new(ScalarIndexParams::for_builtin( Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::BTree)))
BuiltinIndexType::BTree,
)))
} }
Index::Bitmap(_) => { Index::Bitmap(_) => {
Self::validate_index_type(field, "Bitmap", supported_bitmap_data_type)?; Self::validate_index_type(field, "Bitmap", supported_bitmap_data_type)?;
Ok(Box::new(ScalarIndexParams::for_builtin( Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::Bitmap)))
BuiltinIndexType::Bitmap,
)))
} }
Index::LabelList(_) => { Index::LabelList(_) => {
Self::validate_index_type(field, "LabelList", supported_label_list_data_type)?; Self::validate_index_type(field, "LabelList", supported_label_list_data_type)?;
Ok(Box::new(ScalarIndexParams::for_builtin( Ok(Box::new(ScalarIndexParams::new(ScalarIndexType::LabelList)))
BuiltinIndexType::LabelList,
)))
} }
Index::FTS(fts_opts) => { Index::FTS(fts_opts) => {
Self::validate_index_type(field, "FTS", supported_fts_data_type)?; Self::validate_index_type(field, "FTS", supported_fts_data_type)?;
@@ -2339,6 +2331,10 @@ impl BaseTable for NativeTable {
scanner.full_text_search(fts.clone())?; scanner.full_text_search(fts.clone())?;
} }
if query.base.disable_scoring_autoprojection {
scanner.disable_scoring_autoprojection();
}
if let Some(refine_factor) = query.refine_factor { if let Some(refine_factor) = query.refine_factor {
scanner.refine(refine_factor); scanner.refine(refine_factor);
} }
@@ -3272,7 +3268,6 @@ mod tests {
fn wrap( fn wrap(
&self, &self,
original: Arc<dyn object_store::ObjectStore>, original: Arc<dyn object_store::ObjectStore>,
_storage_options: Option<&std::collections::HashMap<String, String>>,
) -> Arc<dyn object_store::ObjectStore> { ) -> Arc<dyn object_store::ObjectStore> {
self.called.store(true, Ordering::Relaxed); self.called.store(true, Ordering::Relaxed);
original original