mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-26 02:20:40 +00:00
Compare commits
14 Commits
python-v0.
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ade8846fd | ||
|
|
52ce2c995c | ||
|
|
e71a00998c | ||
|
|
39a2ac0a1c | ||
|
|
bc7b344fa4 | ||
|
|
f91d2f5fec | ||
|
|
cf81b6419f | ||
|
|
0498ac1f2f | ||
|
|
aeb1c3ee6a | ||
|
|
f9ae46c0e7 | ||
|
|
84bf022fb1 | ||
|
|
310967eceb | ||
|
|
154dbeee2a | ||
|
|
c9c08ac8b9 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.27.0-beta.1"
|
||||
current_version = "0.27.0-beta.3"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -29,6 +29,7 @@ runs:
|
||||
if: ${{ inputs.arm-build == 'false' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
@@ -44,6 +45,7 @@ runs:
|
||||
if: ${{ inputs.arm-build == 'true' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
|
||||
1
.github/workflows/build_mac_wheel/action.yml
vendored
1
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -20,6 +20,7 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
# TODO: pass through interpreter
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
|
||||
@@ -25,6 +25,7 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
working-directory: python
|
||||
|
||||
3
.github/workflows/npm-publish.yml
vendored
3
.github/workflows/npm-publish.yml
vendored
@@ -356,7 +356,8 @@ jobs:
|
||||
if [[ $DRY_RUN == "true" ]]; then
|
||||
ARGS="$ARGS --dry-run"
|
||||
fi
|
||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
if [[ $VERSION == *-* ]]; then
|
||||
ARGS="$ARGS --tag preview"
|
||||
fi
|
||||
npm publish $ARGS
|
||||
|
||||
4
.github/workflows/python.yml
vendored
4
.github/workflows/python.yml
vendored
@@ -10,6 +10,10 @@ on:
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
- .github/workflows/build_linux_wheel/**
|
||||
- .github/workflows/build_mac_wheel/**
|
||||
- .github/workflows/build_windows_wheel/**
|
||||
- .github/workflows/run_tests/**
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
4
.github/workflows/rust.yml
vendored
4
.github/workflows/rust.yml
vendored
@@ -100,7 +100,9 @@ jobs:
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- uses: rui314/setup-mold@v1
|
||||
- name: Make Swap
|
||||
run: |
|
||||
|
||||
520
Cargo.lock
generated
520
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
44
Cargo.toml
44
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=4.0.0-beta.6", default-features = false, "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=4.0.0-beta.6", "tag" = "v4.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "57.2", optional = false }
|
||||
@@ -40,13 +40,15 @@ arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "51.0", default-features = false }
|
||||
datafusion-catalog = "51.0"
|
||||
datafusion-common = { version = "51.0", default-features = false }
|
||||
datafusion-execution = "51.0"
|
||||
datafusion-expr = "51.0"
|
||||
datafusion-physical-plan = "51.0"
|
||||
datafusion-physical-expr = "51.0"
|
||||
datafusion = { version = "52.1", default-features = false }
|
||||
datafusion-catalog = "52.1"
|
||||
datafusion-common = { version = "52.1", default-features = false }
|
||||
datafusion-execution = "52.1"
|
||||
datafusion-expr = "52.1"
|
||||
datafusion-functions = "52.1"
|
||||
datafusion-physical-plan = "52.1"
|
||||
datafusion-physical-expr = "52.1"
|
||||
datafusion-sql = "52.1"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.27.0-beta.1</version>
|
||||
<version>0.27.0-beta.3</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -8,6 +8,14 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
numDeletedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.0-beta.1</version>
|
||||
<version>0.27.0-beta.3</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.27.0-beta.1</version>
|
||||
<version>0.27.0-beta.3</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>3.1.0-beta.2</lance-core.version>
|
||||
<lance-core.version>4.0.0-beta.6</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.27.0-beta.1"
|
||||
version = "0.27.0-beta.3"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -1697,6 +1697,65 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(results2[0].text).toBe(data[1].text);
|
||||
});
|
||||
|
||||
test("full text search fast search", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
|
||||
const table = await db.createTable("test", data);
|
||||
await table.createIndex("text", {
|
||||
config: Index.fts(),
|
||||
});
|
||||
|
||||
// Insert unindexed data after creating the index.
|
||||
await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
|
||||
|
||||
const withFlatSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(withFlatSearch.length).toBeGreaterThan(0);
|
||||
|
||||
const fastSearchResults = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(fastSearchResults.length).toBe(0);
|
||||
|
||||
const nearestToTextFastSearch = await table
|
||||
.query()
|
||||
.nearestToText("xyz")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(nearestToTextFastSearch.length).toBe(0);
|
||||
|
||||
// fastSearch should be chainable with other methods.
|
||||
const chainedFastSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.select(["text"])
|
||||
.limit(5)
|
||||
.toArray();
|
||||
expect(chainedFastSearch.length).toBe(0);
|
||||
|
||||
await table.optimize();
|
||||
|
||||
const indexedFastSearch = await table
|
||||
.search("xyz", "fts")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(indexedFastSearch.length).toBeGreaterThan(0);
|
||||
|
||||
const indexedNearestToTextFastSearch = await table
|
||||
.query()
|
||||
.nearestToText("xyz")
|
||||
.fastSearch()
|
||||
.limit(10)
|
||||
.toArray();
|
||||
expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("prewarm full text search index", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const data = [
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.27.0-beta.1",
|
||||
"version": "0.27.0-beta.3",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -753,12 +753,14 @@ impl From<lancedb::table::AddResult> for AddResult {
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DeleteResult {
|
||||
pub num_deleted_rows: i64,
|
||||
pub version: i64,
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
fn from(value: lancedb::table::DeleteResult) -> Self {
|
||||
Self {
|
||||
num_deleted_rows: value.num_deleted_rows as i64,
|
||||
version: value.version as i64,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.0-beta.2"
|
||||
current_version = "0.30.0-beta.3"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.30.0-beta.2"
|
||||
version = "0.30.0-beta.3"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -59,7 +59,7 @@ tests = [
|
||||
"polars>=0.19, <=1.3.0",
|
||||
"tantivy",
|
||||
"pyarrow-stubs",
|
||||
"pylance>=1.0.0b14",
|
||||
"pylance>=1.0.0b14,<3.0.0",
|
||||
"requests",
|
||||
"datafusion<52",
|
||||
]
|
||||
|
||||
@@ -606,6 +606,7 @@ class LanceQueryBuilder(ABC):
|
||||
query,
|
||||
ordering_field_name=ordering_field_name,
|
||||
fts_columns=fts_columns,
|
||||
fast_search=fast_search,
|
||||
)
|
||||
|
||||
if isinstance(query, list):
|
||||
@@ -1456,12 +1457,14 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
query: str | FullTextQuery,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||
fast_search: bool = None,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
self._phrase_query = False
|
||||
self.ordering_field_name = ordering_field_name
|
||||
self._reranker = None
|
||||
self._fast_search = fast_search
|
||||
if isinstance(fts_columns, str):
|
||||
fts_columns = [fts_columns]
|
||||
self._fts_columns = fts_columns
|
||||
@@ -1483,6 +1486,19 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
self._phrase_query = phrase_query
|
||||
return self
|
||||
|
||||
def fast_search(self) -> LanceFtsQueryBuilder:
|
||||
"""
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceFtsQueryBuilder
|
||||
The LanceFtsQueryBuilder object.
|
||||
"""
|
||||
self._fast_search = True
|
||||
return self
|
||||
|
||||
def to_query_object(self) -> Query:
|
||||
return Query(
|
||||
columns=self._columns,
|
||||
@@ -1494,6 +1510,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
query=self._query, columns=self._fts_columns
|
||||
),
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
)
|
||||
|
||||
def output_schema(self) -> pa.Schema:
|
||||
|
||||
@@ -218,8 +218,6 @@ class RemoteTable(Table):
|
||||
train: bool = True,
|
||||
):
|
||||
"""Create an index on the table.
|
||||
Currently, the only parameters that matter are
|
||||
the metric and the vector column name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -250,11 +248,6 @@ class RemoteTable(Table):
|
||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if num_sub_vectors is not None:
|
||||
logging.warning(
|
||||
"num_sub_vectors is not supported on LanceDB cloud."
|
||||
"This parameter will be tuned automatically."
|
||||
)
|
||||
if accelerator is not None:
|
||||
logging.warning(
|
||||
"GPU accelerator is not yet supported on LanceDB cloud."
|
||||
|
||||
@@ -1331,7 +1331,7 @@ class Table(ABC):
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
DeleteResult(version=2)
|
||||
DeleteResult(num_deleted_rows=1, version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
@@ -1345,7 +1345,7 @@ class Table(ABC):
|
||||
>>> to_remove
|
||||
'1, 5'
|
||||
>>> table.delete(f"x IN ({to_remove})")
|
||||
DeleteResult(version=3)
|
||||
DeleteResult(num_deleted_rows=1, version=3)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 3 [5.0, 6.0]
|
||||
@@ -4215,7 +4215,7 @@ class AsyncTable:
|
||||
1 2 [3.0, 4.0]
|
||||
2 3 [5.0, 6.0]
|
||||
>>> table.delete("x = 2")
|
||||
DeleteResult(version=2)
|
||||
DeleteResult(num_deleted_rows=1, version=2)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 1 [1.0, 2.0]
|
||||
@@ -4229,7 +4229,7 @@ class AsyncTable:
|
||||
>>> to_remove
|
||||
'1, 5'
|
||||
>>> table.delete(f"x IN ({to_remove})")
|
||||
DeleteResult(version=3)
|
||||
DeleteResult(num_deleted_rows=1, version=3)
|
||||
>>> table.to_pandas()
|
||||
x vector
|
||||
0 3 [5.0, 6.0]
|
||||
|
||||
@@ -27,6 +27,7 @@ from lancedb.query import (
|
||||
PhraseQuery,
|
||||
BooleanQuery,
|
||||
Occur,
|
||||
LanceFtsQueryBuilder,
|
||||
)
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
@@ -882,3 +883,109 @@ def test_fts_query_to_json():
|
||||
'"must_not":[]}}'
|
||||
)
|
||||
assert json_str == expected
|
||||
|
||||
|
||||
def test_fts_fast_search(table):
|
||||
table.create_fts_index("text", use_tantivy=False)
|
||||
|
||||
# Insert some unindexed data
|
||||
table.add(
|
||||
[
|
||||
{
|
||||
"text": "xyz",
|
||||
"vector": [0 for _ in range(128)],
|
||||
"id": 101,
|
||||
"text2": "xyz",
|
||||
"nested": {"text": "xyz"},
|
||||
"count": 10,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Without fast_search, the query object should not have fast_search set
|
||||
builder = table.search("xyz", query_type="fts").limit(10)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is None
|
||||
|
||||
# With fast_search, the query object should have fast_search=True
|
||||
builder = table.search("xyz", query_type="fts").fast_search().limit(10)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is True
|
||||
|
||||
# fast_search should be chainable with other methods
|
||||
builder = (
|
||||
table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
|
||||
)
|
||||
query = builder.to_query_object()
|
||||
assert query.fast_search is True
|
||||
assert query.limit == 5
|
||||
assert query.columns == ["text"]
|
||||
|
||||
# fast_search should be enabled by keyword argument too
|
||||
query = LanceFtsQueryBuilder(table, "xyz", fast_search=True).to_query_object()
|
||||
assert query.fast_search is True
|
||||
|
||||
# Verify it executes without error and skips unindexed data
|
||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||
assert len(results) == 0
|
||||
|
||||
# Update index and verify it returns results
|
||||
table.optimize()
|
||||
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||
assert len(results) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fts_fast_search_async(async_table):
|
||||
await async_table.create_index("text", config=FTS())
|
||||
|
||||
# Insert some unindexed data
|
||||
await async_table.add(
|
||||
[
|
||||
{
|
||||
"text": "xyz",
|
||||
"vector": [0 for _ in range(128)],
|
||||
"id": 101,
|
||||
"text2": "xyz",
|
||||
"nested": {"text": "xyz"},
|
||||
"count": 10,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Without fast_search, should return results
|
||||
results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
|
||||
assert len(results) > 0
|
||||
|
||||
# With fast_search, should return no results data unindexed
|
||||
fast_results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(fast_results) == 0
|
||||
|
||||
# Update index and verify it returns results
|
||||
await async_table.optimize()
|
||||
|
||||
fast_results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(fast_results) > 0
|
||||
|
||||
# fast_search should be chainable with other methods
|
||||
results = (
|
||||
await async_table.query()
|
||||
.nearest_to_text("xyz")
|
||||
.fast_search()
|
||||
.select(["text"])
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) > 0
|
||||
|
||||
@@ -71,7 +71,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
"Failed to call fetch_storage_options: {}",
|
||||
e
|
||||
))),
|
||||
location: snafu::location!(),
|
||||
location: std::panic::Location::caller(),
|
||||
})?;
|
||||
|
||||
// If result is None, return None
|
||||
@@ -83,7 +83,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let result_dict = result.downcast::<PyDict>().map_err(|_| {
|
||||
lance_core::Error::InvalidInput {
|
||||
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
|
||||
location: snafu::location!(),
|
||||
location: std::panic::Location::caller(),
|
||||
}
|
||||
})?;
|
||||
|
||||
@@ -93,13 +93,13 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let key_str: String = key.extract().map_err(|e| {
|
||||
lance_core::Error::InvalidInput {
|
||||
source: format!("Storage option key must be a string: {}", e).into(),
|
||||
location: snafu::location!(),
|
||||
location: std::panic::Location::caller(),
|
||||
}
|
||||
})?;
|
||||
let value_str: String = value.extract().map_err(|e| {
|
||||
lance_core::Error::InvalidInput {
|
||||
source: format!("Storage option value must be a string: {}", e).into(),
|
||||
location: snafu::location!(),
|
||||
location: std::panic::Location::caller(),
|
||||
}
|
||||
})?;
|
||||
storage_options.insert(key_str, value_str);
|
||||
@@ -114,7 +114,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
"Task join error: {}",
|
||||
e
|
||||
))),
|
||||
location: snafu::location!(),
|
||||
location: std::panic::Location::caller(),
|
||||
})?
|
||||
}
|
||||
|
||||
|
||||
@@ -112,19 +112,24 @@ impl From<lancedb::table::AddResult> for AddResult {
|
||||
#[pyclass(get_all)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DeleteResult {
|
||||
pub num_deleted_rows: u64,
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl DeleteResult {
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("DeleteResult(version={})", self.version)
|
||||
format!(
|
||||
"DeleteResult(num_deleted_rows={}, version={})",
|
||||
self.num_deleted_rows, self.version
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||
fn from(result: lancedb::table::DeleteResult) -> Self {
|
||||
Self {
|
||||
num_deleted_rows: result.num_deleted_rows,
|
||||
version: result.version,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.27.0-beta.1"
|
||||
version = "0.27.0-beta.3"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -25,9 +25,9 @@ datafusion-catalog.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-execution.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-functions = "51.0"
|
||||
datafusion-functions.workspace = true
|
||||
datafusion-physical-expr.workspace = true
|
||||
datafusion-sql = "51.0"
|
||||
datafusion-sql.workspace = true
|
||||
datafusion-physical-plan.workspace = true
|
||||
datafusion.workspace = true
|
||||
object_store = { workspace = true }
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
///
|
||||
/// The btree index does not currently have any parameters though parameters such as the
|
||||
/// block size may be added in the future.
|
||||
#[derive(Default, Debug, Clone)]
|
||||
#[derive(Default, Debug, Clone, serde::Serialize)]
|
||||
pub struct BTreeIndexBuilder {}
|
||||
|
||||
impl BTreeIndexBuilder {}
|
||||
@@ -39,7 +39,7 @@ impl BTreeIndexBuilder {}
|
||||
/// This index works best for low-cardinality (i.e., less than 1000 unique values) columns,
|
||||
/// where the number of unique values is small.
|
||||
/// The bitmap stores a list of row ids where the value is present.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug, Clone, Default, serde::Serialize)]
|
||||
pub struct BitmapIndexBuilder {}
|
||||
|
||||
/// Builder for LabelList index.
|
||||
@@ -48,7 +48,7 @@ pub struct BitmapIndexBuilder {}
|
||||
/// support queries with `array_contains_all` and `array_contains_any`
|
||||
/// using an underlying bitmap index.
|
||||
///
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug, Clone, Default, serde::Serialize)]
|
||||
pub struct LabelListIndexBuilder {}
|
||||
|
||||
pub use lance_index::scalar::inverted::query::*;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//! Vector indices are only supported on fixed-size-list (tensor) columns of floating point
|
||||
//! values
|
||||
use lance::table::format::{IndexMetadata, Manifest};
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::DistanceType;
|
||||
|
||||
@@ -181,14 +182,17 @@ macro_rules! impl_hnsw_params_setter {
|
||||
/// The partitioning process is called IVF and the `num_partitions` parameter controls how many groups to create.
|
||||
///
|
||||
/// Note that training an IVF Flat index on a large dataset is a slow operation and currently is also a memory intensive operation.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfFlatIndexBuilder {
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
|
||||
// IVF
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
@@ -213,14 +217,17 @@ impl IvfFlatIndexBuilder {
|
||||
///
|
||||
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
|
||||
/// It offers a balance between search performance and storage footprint.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfSqIndexBuilder {
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
|
||||
// IVF
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
@@ -261,18 +268,23 @@ impl IvfSqIndexBuilder {
|
||||
///
|
||||
/// Note that training an IVF PQ index on a large dataset is a slow operation and
|
||||
/// currently is also a memory intensive operation.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfPqIndexBuilder {
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
|
||||
// IVF
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
|
||||
// PQ
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_sub_vectors: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_bits: Option<u32>,
|
||||
}
|
||||
|
||||
@@ -323,14 +335,18 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
|
||||
///
|
||||
/// Note that training an IVF RQ index on a large dataset is a slow operation and
|
||||
/// currently is also a memory intensive operation.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfRqIndexBuilder {
|
||||
// IVF
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_bits: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
}
|
||||
|
||||
@@ -365,13 +381,16 @@ impl IvfRqIndexBuilder {
|
||||
/// quickly find the closest vectors to a query vector.
|
||||
///
|
||||
/// The PQ (product quantizer) is used to compress the vectors as the same as IVF PQ.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfHnswPqIndexBuilder {
|
||||
// IVF
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
|
||||
// HNSW
|
||||
@@ -379,7 +398,9 @@ pub struct IvfHnswPqIndexBuilder {
|
||||
pub(crate) ef_construction: u32,
|
||||
|
||||
// PQ
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_sub_vectors: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_bits: Option<u32>,
|
||||
}
|
||||
|
||||
@@ -415,13 +436,16 @@ impl IvfHnswPqIndexBuilder {
|
||||
///
|
||||
/// The SQ (scalar quantizer) is used to compress the vectors,
|
||||
/// each vector is mapped to a 8-bit integer vector, 4x compression ratio for float32 vector.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct IvfHnswSqIndexBuilder {
|
||||
// IVF
|
||||
#[serde(rename = "metric_type")]
|
||||
pub(crate) distance_type: DistanceType,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) num_partitions: Option<u32>,
|
||||
pub(crate) sample_rate: u32,
|
||||
pub(crate) max_iterations: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub(crate) target_partition_size: Option<u32>,
|
||||
|
||||
// HNSW
|
||||
|
||||
@@ -1227,7 +1227,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||
if body.trim().is_empty() {
|
||||
// Backward compatible with old servers
|
||||
return Ok(DeleteResult { version: 0 });
|
||||
return Ok(DeleteResult {
|
||||
num_deleted_rows: 0,
|
||||
version: 0,
|
||||
});
|
||||
}
|
||||
let delete_response: DeleteResult =
|
||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||
@@ -1273,73 +1276,24 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
);
|
||||
}
|
||||
|
||||
match index.index {
|
||||
// TODO: Should we pass the actual index parameters? SaaS does not
|
||||
// yet support them.
|
||||
Index::IvfFlat(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::IvfPq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
if let Some(num_bits) = index.num_bits {
|
||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
||||
}
|
||||
}
|
||||
Index::IvfSq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::IvfHnswSq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
}
|
||||
Index::IvfRq(index) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
||||
if let Some(num_partitions) = index.num_partitions {
|
||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
||||
}
|
||||
if let Some(num_bits) = index.num_bits {
|
||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
||||
}
|
||||
}
|
||||
Index::BTree(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
||||
}
|
||||
Index::Bitmap(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
|
||||
}
|
||||
Index::LabelList(_) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
|
||||
}
|
||||
Index::FTS(fts) => {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
|
||||
let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
|
||||
message: format!("failed to serialize FTS index params {:?}", e),
|
||||
})?;
|
||||
for (key, value) in params.as_object().unwrap() {
|
||||
body[key] = value.clone();
|
||||
}
|
||||
}
|
||||
fn to_json(params: &impl serde::Serialize) -> crate::Result<serde_json::Value> {
|
||||
serde_json::to_value(params).map_err(|e| Error::InvalidInput {
|
||||
message: format!("failed to serialize index params {:?}", e),
|
||||
})
|
||||
}
|
||||
|
||||
// Map each Index variant to its wire type name and serializable params.
|
||||
// Auto is special-cased since it needs schema inspection.
|
||||
let (index_type_str, params) = match &index.index {
|
||||
Index::IvfFlat(p) => ("IVF_FLAT", Some(to_json(p)?)),
|
||||
Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
|
||||
Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
|
||||
Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
|
||||
Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
|
||||
Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
|
||||
Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
|
||||
Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
|
||||
Index::FTS(p) => ("FTS", Some(to_json(p)?)),
|
||||
Index::Auto => {
|
||||
let schema = self.schema().await?;
|
||||
let field = schema
|
||||
@@ -1348,11 +1302,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
message: format!("Column {} not found in schema", column),
|
||||
})?;
|
||||
if supported_vector_data_type(field.data_type()) {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
||||
body[METRIC_TYPE_KEY] =
|
||||
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
|
||||
("IVF_PQ", None)
|
||||
} else if supported_btree_data_type(field.data_type()) {
|
||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
||||
("BTREE", None)
|
||||
} else {
|
||||
return Err(Error::NotSupported {
|
||||
message: format!(
|
||||
@@ -1370,6 +1324,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
}
|
||||
};
|
||||
|
||||
body[INDEX_TYPE_KEY] = index_type_str.into();
|
||||
if let Some(params) = params {
|
||||
for (key, value) in params.as_object().expect("params should be a JSON object") {
|
||||
body[key] = value.clone();
|
||||
}
|
||||
}
|
||||
|
||||
let request = request.json(&body);
|
||||
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
@@ -1830,7 +1791,9 @@ mod tests {
|
||||
use rstest::rstest;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
|
||||
use crate::index::vector::{
|
||||
IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
|
||||
};
|
||||
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
||||
use crate::remote::JSON_CONTENT_TYPE;
|
||||
use crate::utils::background_cache::clock;
|
||||
@@ -2992,6 +2955,8 @@ mod tests {
|
||||
"IVF_FLAT",
|
||||
json!({
|
||||
"metric_type": "hamming",
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
|
||||
),
|
||||
@@ -3000,6 +2965,8 @@ mod tests {
|
||||
json!({
|
||||
"metric_type": "hamming",
|
||||
"num_partitions": 128,
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfFlat(
|
||||
IvfFlatIndexBuilder::default()
|
||||
@@ -3011,6 +2978,8 @@ mod tests {
|
||||
"IVF_PQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfPq(Default::default()),
|
||||
),
|
||||
@@ -3020,6 +2989,8 @@ mod tests {
|
||||
"metric_type": "cosine",
|
||||
"num_partitions": 128,
|
||||
"num_bits": 4,
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfPq(
|
||||
IvfPqIndexBuilder::default()
|
||||
@@ -3028,10 +2999,29 @@ mod tests {
|
||||
.num_bits(4),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_PQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"num_sub_vectors": 16,
|
||||
"sample_rate": 512,
|
||||
"max_iterations": 100,
|
||||
}),
|
||||
Index::IvfPq(
|
||||
IvfPqIndexBuilder::default()
|
||||
.num_sub_vectors(16)
|
||||
.sample_rate(512)
|
||||
.max_iterations(100),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_HNSW_SQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
"m": 20,
|
||||
"ef_construction": 300,
|
||||
}),
|
||||
Index::IvfHnswSq(Default::default()),
|
||||
),
|
||||
@@ -3040,11 +3030,65 @@ mod tests {
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"num_partitions": 128,
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
"m": 40,
|
||||
"ef_construction": 500,
|
||||
}),
|
||||
Index::IvfHnswSq(
|
||||
IvfHnswSqIndexBuilder::default()
|
||||
.distance_type(DistanceType::L2)
|
||||
.num_partitions(128),
|
||||
.num_partitions(128)
|
||||
.num_edges(40)
|
||||
.ef_construction(500),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_SQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfSq(Default::default()),
|
||||
),
|
||||
(
|
||||
"IVF_SQ",
|
||||
json!({
|
||||
"metric_type": "cosine",
|
||||
"num_partitions": 64,
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfSq(
|
||||
IvfSqIndexBuilder::default()
|
||||
.distance_type(DistanceType::Cosine)
|
||||
.num_partitions(64),
|
||||
),
|
||||
),
|
||||
(
|
||||
"IVF_RQ",
|
||||
json!({
|
||||
"metric_type": "l2",
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfRq(Default::default()),
|
||||
),
|
||||
(
|
||||
"IVF_RQ",
|
||||
json!({
|
||||
"metric_type": "cosine",
|
||||
"num_partitions": 64,
|
||||
"num_bits": 8,
|
||||
"sample_rate": 256,
|
||||
"max_iterations": 50,
|
||||
}),
|
||||
Index::IvfRq(
|
||||
IvfRqIndexBuilder::default()
|
||||
.distance_type(DistanceType::Cosine)
|
||||
.num_partitions(64)
|
||||
.num_bits(8),
|
||||
),
|
||||
),
|
||||
// HNSW_PQ isn't yet supported on SaaS
|
||||
|
||||
@@ -155,7 +155,9 @@ impl AddDataBuilder {
|
||||
|
||||
pub struct PreprocessingOutput {
|
||||
pub plan: Arc<dyn datafusion_physical_plan::ExecutionPlan>,
|
||||
#[cfg_attr(not(feature = "remote"), allow(dead_code))]
|
||||
pub overwrite: bool,
|
||||
#[cfg_attr(not(feature = "remote"), allow(dead_code))]
|
||||
pub rescannable: bool,
|
||||
pub write_options: WriteOptions,
|
||||
pub mode: AddDataMode,
|
||||
|
||||
@@ -7,6 +7,9 @@ use crate::Result;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct DeleteResult {
|
||||
/// The number of rows that were deleted.
|
||||
#[serde(default)]
|
||||
pub num_deleted_rows: u64,
|
||||
// The commit version associated with the operation.
|
||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||
/// a commit version.
|
||||
@@ -20,10 +23,14 @@ pub struct DeleteResult {
|
||||
pub(crate) async fn execute_delete(table: &NativeTable, predicate: &str) -> Result<DeleteResult> {
|
||||
table.dataset.ensure_mutable()?;
|
||||
let mut dataset = (*table.dataset.get().await?).clone();
|
||||
dataset.delete(predicate).await?;
|
||||
let delete_result = dataset.delete(predicate).await?;
|
||||
let num_deleted_rows = delete_result.num_deleted_rows;
|
||||
let version = dataset.version().version;
|
||||
table.dataset.update(dataset);
|
||||
Ok(DeleteResult { version })
|
||||
Ok(DeleteResult {
|
||||
num_deleted_rows,
|
||||
version,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -108,6 +115,32 @@ mod tests {
|
||||
assert_eq!(current_schema, original_schema);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_returns_num_deleted_rows() {
|
||||
let conn = connect("memory://").execute().await.unwrap();
|
||||
let batch = record_batch!(("id", Int32, [1, 2, 3, 4, 5])).unwrap();
|
||||
let table = conn
|
||||
.create_table("test_num_deleted", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Delete 2 rows (id > 3 means id=4 and id=5)
|
||||
let result = table.delete("id > 3").await.unwrap();
|
||||
assert_eq!(result.num_deleted_rows, 2);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
|
||||
// Delete 0 rows (no rows match)
|
||||
let result = table.delete("id > 100").await.unwrap();
|
||||
assert_eq!(result.num_deleted_rows, 0);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
|
||||
// Delete remaining rows
|
||||
let result = table.delete("true").await.unwrap();
|
||||
assert_eq!(result.num_deleted_rows, 3);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_false_increments_version() {
|
||||
let conn = connect("memory://").execute().await.unwrap();
|
||||
|
||||
Reference in New Issue
Block a user