mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 15:12:53 +00:00
Compare commits
15 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4231925476 | ||
|
|
84a6693294 | ||
|
|
6c2d4c10a4 | ||
|
|
d914722f79 | ||
|
|
a6e4034dba | ||
|
|
2616a50502 | ||
|
|
7b5e9d824a | ||
|
|
3b173e7cb9 | ||
|
|
d496ab13a0 | ||
|
|
69d9beebc7 | ||
|
|
d32360b99d | ||
|
|
9fa08bfa93 | ||
|
|
d6d9cb7415 | ||
|
|
990d93f553 | ||
|
|
0832cba3c6 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.13.1-beta.0"
|
current_version = "0.14.0-beta.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
14
.github/workflows/npm-publish.yml
vendored
14
.github/workflows/npm-publish.yml
vendored
@@ -133,7 +133,7 @@ jobs:
|
|||||||
free -h
|
free -h
|
||||||
- name: Build Linux Artifacts
|
- name: Build Linux Artifacts
|
||||||
run: |
|
run: |
|
||||||
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }}
|
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
|
||||||
- name: Upload Linux Artifacts
|
- name: Upload Linux Artifacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -143,7 +143,7 @@ jobs:
|
|||||||
|
|
||||||
node-linux-musl:
|
node-linux-musl:
|
||||||
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
name: vectordb (${{ matrix.config.arch}}-unknown-linux-musl)
|
||||||
runs-on: ubuntu-latest
|
runs-on: ${{ matrix.config.runner }}
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -152,7 +152,10 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
|
runner: ubuntu-latest
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
|
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
||||||
|
runner: buildjet-16vcpu-ubuntu-2204-arm
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -185,7 +188,7 @@ jobs:
|
|||||||
- name: Build Linux Artifacts
|
- name: Build Linux Artifacts
|
||||||
run: |
|
run: |
|
||||||
source ./saved_env
|
source ./saved_env
|
||||||
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }}
|
bash ci/manylinux_node/build_vectordb.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-musl
|
||||||
- name: Upload Linux Artifacts
|
- name: Upload Linux Artifacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -246,7 +249,7 @@ jobs:
|
|||||||
|
|
||||||
nodejs-linux-musl:
|
nodejs-linux-musl:
|
||||||
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
name: lancedb (${{ matrix.config.arch}}-unknown-linux-musl
|
||||||
runs-on: ubuntu-latest
|
runs-on: ${{ matrix.config.runner }}
|
||||||
container: alpine:edge
|
container: alpine:edge
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -255,7 +258,10 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- arch: x86_64
|
- arch: x86_64
|
||||||
|
runner: ubuntu-latest
|
||||||
- arch: aarch64
|
- arch: aarch64
|
||||||
|
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
||||||
|
runner: buildjet-16vcpu-ubuntu-2204-arm
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
15
Cargo.toml
15
Cargo.toml
@@ -23,13 +23,14 @@ rust-version = "1.80.0" # TO
|
|||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.20.0", "features" = [
|
lance = { "version" = "=0.20.0", "features" = [
|
||||||
"dynamodb",
|
"dynamodb",
|
||||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-testing = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.2" }
|
lance-datafusion = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
|
lance-encoding = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "52.2", optional = false }
|
arrow = { version = "52.2", optional = false }
|
||||||
arrow-array = "52.2"
|
arrow-array = "52.2"
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
ARCH=${1:-x86_64}
|
ARCH=${1:-x86_64}
|
||||||
|
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
||||||
|
|
||||||
# We pass down the current user so that when we later mount the local files
|
# We pass down the current user so that when we later mount the local files
|
||||||
# into the container, the files are accessible by the current user.
|
# into the container, the files are accessible by the current user.
|
||||||
@@ -18,4 +19,4 @@ docker run \
|
|||||||
-v $(pwd):/io -w /io \
|
-v $(pwd):/io -w /io \
|
||||||
--memory-swap=-1 \
|
--memory-swap=-1 \
|
||||||
lancedb-node-manylinux \
|
lancedb-node-manylinux \
|
||||||
bash ci/manylinux_node/build_vectordb.sh $ARCH
|
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
|
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
|
||||||
set -e
|
set -e
|
||||||
ARCH=${1:-x86_64}
|
ARCH=${1:-x86_64}
|
||||||
|
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
|
||||||
|
|
||||||
if [ "$ARCH" = "x86_64" ]; then
|
if [ "$ARCH" = "x86_64" ]; then
|
||||||
export OPENSSL_LIB_DIR=/usr/local/lib64/
|
export OPENSSL_LIB_DIR=/usr/local/lib64/
|
||||||
@@ -17,4 +18,4 @@ FILE=$HOME/.bashrc && test -f $FILE && source $FILE
|
|||||||
cd node
|
cd node
|
||||||
npm ci
|
npm ci
|
||||||
npm run build-release
|
npm run build-release
|
||||||
npm run pack-build
|
npm run pack-build -t $TARGET_TRIPLE
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ plugins:
|
|||||||
show_signature_annotations: true
|
show_signature_annotations: true
|
||||||
show_root_heading: true
|
show_root_heading: true
|
||||||
members_order: source
|
members_order: source
|
||||||
|
docstring_section_style: list
|
||||||
|
signature_crossrefs: true
|
||||||
|
separate_signature: true
|
||||||
import:
|
import:
|
||||||
# for cross references
|
# for cross references
|
||||||
- https://arrow.apache.org/docs/objects.inv
|
- https://arrow.apache.org/docs/objects.inv
|
||||||
|
|||||||
@@ -1,6 +1,16 @@
|
|||||||
# Python API Reference
|
# Python API Reference
|
||||||
|
|
||||||
This section contains the API reference for the OSS Python API.
|
This section contains the API reference for the Python API. There is a
|
||||||
|
synchronous and an asynchronous API client.
|
||||||
|
|
||||||
|
The general flow of using the API is:
|
||||||
|
|
||||||
|
1. Use [lancedb.connect][] or [lancedb.connect_async][] to connect to a database.
|
||||||
|
2. Use the returned [lancedb.DBConnection][] or [lancedb.AsyncConnection][] to
|
||||||
|
create or open tables.
|
||||||
|
3. Use the returned [lancedb.table.Table][] or [lancedb.AsyncTable][] to query
|
||||||
|
or modify tables.
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.1-beta.0</version>
|
<version>0.14.0-beta.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.13.1-beta.0</version>
|
<version>0.14.0-beta.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<name>LanceDB Parent</name>
|
<name>LanceDB Parent</name>
|
||||||
|
|||||||
20
node/package-lock.json
generated
20
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,14 +52,14 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0",
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0"
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -91,13 +91,13 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-x64": "0.13.1-beta.0",
|
"@lancedb/vectordb-darwin-x64": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.13.1-beta.0",
|
"@lancedb/vectordb-darwin-arm64": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-x64-gnu": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-x64-musl": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-x64-musl": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-linux-arm64-musl": "0.13.1-beta.0",
|
"@lancedb/vectordb-linux-arm64-musl": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.13.1-beta.0",
|
"@lancedb/vectordb-win32-x64-msvc": "0.14.0-beta.0",
|
||||||
"@lancedb/vectordb-win32-arm64-msvc": "0.13.1-beta.0"
|
"@lancedb/vectordb-win32-arm64-msvc": "0.14.0-beta.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.13.1-beta.0"
|
version = "0.14.0-beta.0"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -110,7 +110,10 @@ describe("given a connection", () => {
|
|||||||
let table = await db.createTable("test", data, { useLegacyFormat: true });
|
let table = await db.createTable("test", data, { useLegacyFormat: true });
|
||||||
|
|
||||||
const isV2 = async (table: Table) => {
|
const isV2 = async (table: Table) => {
|
||||||
const data = await table.query().toArrow({ maxBatchLength: 100000 });
|
const data = await table
|
||||||
|
.query()
|
||||||
|
.limit(10000)
|
||||||
|
.toArrow({ maxBatchLength: 100000 });
|
||||||
console.log(data.batches.length);
|
console.log(data.batches.length);
|
||||||
return data.batches.length < 5;
|
return data.batches.length < 5;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -585,11 +585,11 @@ describe("When creating an index", () => {
|
|||||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||||
|
|
||||||
for await (const r of tbl.query().where("id > 1").select(["id"])) {
|
for await (const r of tbl.query().where("id > 1").select(["id"])) {
|
||||||
expect(r.numRows).toBe(298);
|
expect(r.numRows).toBe(10);
|
||||||
}
|
}
|
||||||
// should also work with 'filter' alias
|
// should also work with 'filter' alias
|
||||||
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
|
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
|
||||||
expect(r.numRows).toBe(298);
|
expect(r.numRows).toBe(10);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.13.1-beta.0",
|
"version": "0.14.0-beta.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.17.0-beta.0"
|
current_version = "0.17.0-beta.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.17.0-beta.0"
|
version = "0.17.0-beta.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.20.0b2",
|
"pylance==0.20.0b3",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"packaging",
|
"packaging",
|
||||||
|
|||||||
@@ -1502,10 +1502,11 @@ class AsyncQueryBase(object):
|
|||||||
... print(plan)
|
... print(plan)
|
||||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
FilterExec: _distance@2 IS NOT NULL
|
GlobalLimitExec: skip=0, fetch=10
|
||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
FilterExec: _distance@2 IS NOT NULL
|
||||||
KNNVectorDistance: metric=l2
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
KNNVectorDistance: metric=l2
|
||||||
|
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
|||||||
@@ -599,7 +599,9 @@ async def test_create_in_v2_mode(tmp_path):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def is_in_v2_mode(tbl):
|
async def is_in_v2_mode(tbl):
|
||||||
batches = await tbl.query().to_batches(max_batch_length=1024 * 10)
|
batches = (
|
||||||
|
await tbl.query().limit(10 * 1024).to_batches(max_batch_length=1024 * 10)
|
||||||
|
)
|
||||||
num_batches = 0
|
num_batches = 0
|
||||||
async for batch in batches:
|
async for batch in batches:
|
||||||
num_batches += 1
|
num_batches += 1
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.13.1-beta.0"
|
version = "0.14.0-beta.0"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.13.1-beta.0"
|
version = "0.14.0-beta.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -27,6 +27,7 @@ half = { workspace = true }
|
|||||||
lazy_static.workspace = true
|
lazy_static.workspace = true
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
lance-datafusion.workspace = true
|
lance-datafusion.workspace = true
|
||||||
|
lance-io = { workspace = true }
|
||||||
lance-index = { workspace = true }
|
lance-index = { workspace = true }
|
||||||
lance-table = { workspace = true }
|
lance-table = { workspace = true }
|
||||||
lance-linalg = { workspace = true }
|
lance-linalg = { workspace = true }
|
||||||
|
|||||||
@@ -38,6 +38,9 @@ use crate::table::{NativeTable, TableDefinition, WriteOptions};
|
|||||||
use crate::utils::validate_table_name;
|
use crate::utils::validate_table_name;
|
||||||
use crate::Table;
|
use crate::Table;
|
||||||
pub use lance_encoding::version::LanceFileVersion;
|
pub use lance_encoding::version::LanceFileVersion;
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
use lance_io::object_store::StorageOptions;
|
||||||
|
use lance_table::io::commit::commit_handler_from_url;
|
||||||
|
|
||||||
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
pub const LANCE_FILE_EXTENSION: &str = "lance";
|
||||||
|
|
||||||
@@ -133,7 +136,7 @@ impl IntoArrow for NoData {
|
|||||||
|
|
||||||
/// A builder for configuring a [`Connection::create_table`] operation
|
/// A builder for configuring a [`Connection::create_table`] operation
|
||||||
pub struct CreateTableBuilder<const HAS_DATA: bool, T: IntoArrow> {
|
pub struct CreateTableBuilder<const HAS_DATA: bool, T: IntoArrow> {
|
||||||
parent: Arc<dyn ConnectionInternal>,
|
pub(crate) parent: Arc<dyn ConnectionInternal>,
|
||||||
pub(crate) name: String,
|
pub(crate) name: String,
|
||||||
pub(crate) data: Option<T>,
|
pub(crate) data: Option<T>,
|
||||||
pub(crate) mode: CreateTableMode,
|
pub(crate) mode: CreateTableMode,
|
||||||
@@ -341,7 +344,7 @@ pub struct OpenTableBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl OpenTableBuilder {
|
impl OpenTableBuilder {
|
||||||
fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
|
pub(crate) fn new(parent: Arc<dyn ConnectionInternal>, name: String) -> Self {
|
||||||
Self {
|
Self {
|
||||||
parent,
|
parent,
|
||||||
name,
|
name,
|
||||||
@@ -717,12 +720,14 @@ impl ConnectBuilder {
|
|||||||
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
|
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let storage_options = StorageOptions(self.storage_options.clone());
|
||||||
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
|
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
|
||||||
&self.uri,
|
&self.uri,
|
||||||
&api_key,
|
&api_key,
|
||||||
®ion,
|
®ion,
|
||||||
self.host_override,
|
self.host_override,
|
||||||
self.client_config,
|
self.client_config,
|
||||||
|
storage_options.into(),
|
||||||
)?);
|
)?);
|
||||||
Ok(Connection {
|
Ok(Connection {
|
||||||
internal,
|
internal,
|
||||||
@@ -855,7 +860,7 @@ impl Database {
|
|||||||
let table_base_uri = if let Some(store) = engine {
|
let table_base_uri = if let Some(store) = engine {
|
||||||
static WARN_ONCE: std::sync::Once = std::sync::Once::new();
|
static WARN_ONCE: std::sync::Once = std::sync::Once::new();
|
||||||
WARN_ONCE.call_once(|| {
|
WARN_ONCE.call_once(|| {
|
||||||
log::warn!("Specifing engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
|
log::warn!("Specifying engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
|
||||||
});
|
});
|
||||||
let old_scheme = url.scheme().to_string();
|
let old_scheme = url.scheme().to_string();
|
||||||
let new_scheme = format!("{}+{}", old_scheme, store);
|
let new_scheme = format!("{}+{}", old_scheme, store);
|
||||||
@@ -1036,6 +1041,7 @@ impl ConnectionInternal for Database {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut write_params = options.write_options.lance_write_params.unwrap_or_default();
|
let mut write_params = options.write_options.lance_write_params.unwrap_or_default();
|
||||||
|
|
||||||
if matches!(&options.mode, CreateTableMode::Overwrite) {
|
if matches!(&options.mode, CreateTableMode::Overwrite) {
|
||||||
write_params.mode = WriteMode::Overwrite;
|
write_params.mode = WriteMode::Overwrite;
|
||||||
}
|
}
|
||||||
@@ -1122,7 +1128,7 @@ impl ConnectionInternal for Database {
|
|||||||
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
let dir_name = format!("{}.{}", name, LANCE_EXTENSION);
|
||||||
let full_path = self.base_path.child(dir_name.clone());
|
let full_path = self.base_path.child(dir_name.clone());
|
||||||
self.object_store
|
self.object_store
|
||||||
.remove_dir_all(full_path)
|
.remove_dir_all(full_path.clone())
|
||||||
.await
|
.await
|
||||||
.map_err(|err| match err {
|
.map_err(|err| match err {
|
||||||
// this error is not lance::Error::DatasetNotFound,
|
// this error is not lance::Error::DatasetNotFound,
|
||||||
@@ -1132,6 +1138,19 @@ impl ConnectionInternal for Database {
|
|||||||
},
|
},
|
||||||
_ => Error::from(err),
|
_ => Error::from(err),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let object_store_params = ObjectStoreParams {
|
||||||
|
storage_options: Some(self.storage_options.clone()),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let mut uri = self.uri.clone();
|
||||||
|
if let Some(query_string) = &self.query_string {
|
||||||
|
uri.push_str(&format!("?{}", query_string));
|
||||||
|
}
|
||||||
|
let commit_handler = commit_handler_from_url(&uri, &Some(object_store_params))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
commit_handler.delete(&full_path).await.unwrap();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1169,6 +1188,7 @@ mod tests {
|
|||||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
use crate::query::QueryBase;
|
||||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -1296,6 +1316,7 @@ mod tests {
|
|||||||
// In v1 the row group size will trump max_batch_length
|
// In v1 the row group size will trump max_batch_length
|
||||||
let batches = tbl
|
let batches = tbl
|
||||||
.query()
|
.query()
|
||||||
|
.limit(20000)
|
||||||
.execute_with_options(QueryExecutionOptions {
|
.execute_with_options(QueryExecutionOptions {
|
||||||
max_batch_length: 50000,
|
max_batch_length: 50000,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
|
|||||||
@@ -596,7 +596,7 @@ impl Query {
|
|||||||
pub(crate) fn new(parent: Arc<dyn TableInternal>) -> Self {
|
pub(crate) fn new(parent: Arc<dyn TableInternal>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
parent,
|
parent,
|
||||||
limit: None,
|
limit: Some(DEFAULT_TOP_K),
|
||||||
offset: None,
|
offset: None,
|
||||||
filter: None,
|
filter: None,
|
||||||
full_text_search: None,
|
full_text_search: None,
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ use reqwest::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
|
use crate::remote::db::RemoteOptions;
|
||||||
|
|
||||||
const REQUEST_ID_HEADER: &str = "x-request-id";
|
const REQUEST_ID_HEADER: &str = "x-request-id";
|
||||||
|
|
||||||
@@ -215,6 +216,7 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
region: &str,
|
region: &str,
|
||||||
host_override: Option<String>,
|
host_override: Option<String>,
|
||||||
client_config: ClientConfig,
|
client_config: ClientConfig,
|
||||||
|
options: &RemoteOptions,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
|
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
|
||||||
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
|
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
|
||||||
@@ -255,6 +257,7 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
region,
|
region,
|
||||||
db_name,
|
db_name,
|
||||||
host_override.is_some(),
|
host_override.is_some(),
|
||||||
|
options,
|
||||||
)?)
|
)?)
|
||||||
.user_agent(client_config.user_agent)
|
.user_agent(client_config.user_agent)
|
||||||
.build()
|
.build()
|
||||||
@@ -262,6 +265,7 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
message: "Failed to build HTTP client".into(),
|
message: "Failed to build HTTP client".into(),
|
||||||
source: Some(Box::new(err)),
|
source: Some(Box::new(err)),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let host = match host_override {
|
let host = match host_override {
|
||||||
Some(host_override) => host_override,
|
Some(host_override) => host_override,
|
||||||
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
|
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
|
||||||
@@ -287,6 +291,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
region: &str,
|
region: &str,
|
||||||
db_name: &str,
|
db_name: &str,
|
||||||
has_host_override: bool,
|
has_host_override: bool,
|
||||||
|
options: &RemoteOptions,
|
||||||
) -> Result<HeaderMap> {
|
) -> Result<HeaderMap> {
|
||||||
let mut headers = HeaderMap::new();
|
let mut headers = HeaderMap::new();
|
||||||
headers.insert(
|
headers.insert(
|
||||||
@@ -313,6 +318,23 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(v) = options.0.get("account_name") {
|
||||||
|
headers.insert(
|
||||||
|
"x-azure-storage-account-name",
|
||||||
|
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||||
|
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||||
|
})?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Some(v) = options.0.get("azure_storage_account_name") {
|
||||||
|
headers.insert(
|
||||||
|
"x-azure-storage-account-name",
|
||||||
|
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||||
|
message: format!("non-ascii storage account name '{}' provided", db_name),
|
||||||
|
})?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(headers)
|
Ok(headers)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,18 +12,21 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::RecordBatchReader;
|
use arrow_array::RecordBatchReader;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
|
use lance_io::object_store::StorageOptions;
|
||||||
use moka::future::Cache;
|
use moka::future::Cache;
|
||||||
use reqwest::header::CONTENT_TYPE;
|
use reqwest::header::CONTENT_TYPE;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tokio::task::spawn_blocking;
|
use tokio::task::spawn_blocking;
|
||||||
|
|
||||||
use crate::connection::{
|
use crate::connection::{
|
||||||
ConnectionInternal, CreateTableBuilder, NoData, OpenTableBuilder, TableNamesBuilder,
|
ConnectionInternal, CreateTableBuilder, CreateTableMode, NoData, OpenTableBuilder,
|
||||||
|
TableNamesBuilder,
|
||||||
};
|
};
|
||||||
use crate::embeddings::EmbeddingRegistry;
|
use crate::embeddings::EmbeddingRegistry;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
@@ -52,9 +55,16 @@ impl RemoteDatabase {
|
|||||||
region: &str,
|
region: &str,
|
||||||
host_override: Option<String>,
|
host_override: Option<String>,
|
||||||
client_config: ClientConfig,
|
client_config: ClientConfig,
|
||||||
|
options: RemoteOptions,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let client =
|
let client = RestfulLanceDbClient::try_new(
|
||||||
RestfulLanceDbClient::try_new(uri, api_key, region, host_override, client_config)?;
|
uri,
|
||||||
|
api_key,
|
||||||
|
region,
|
||||||
|
host_override,
|
||||||
|
client_config,
|
||||||
|
&options,
|
||||||
|
)?;
|
||||||
|
|
||||||
let table_cache = Cache::builder()
|
let table_cache = Cache::builder()
|
||||||
.time_to_live(std::time::Duration::from_secs(300))
|
.time_to_live(std::time::Duration::from_secs(300))
|
||||||
@@ -95,6 +105,16 @@ impl<S: HttpSend> std::fmt::Display for RemoteDatabase<S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<&CreateTableMode> for &'static str {
|
||||||
|
fn from(val: &CreateTableMode) -> Self {
|
||||||
|
match val {
|
||||||
|
CreateTableMode::Create => "create",
|
||||||
|
CreateTableMode::Overwrite => "overwrite",
|
||||||
|
CreateTableMode::ExistOk(_) => "exist_ok",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
||||||
async fn table_names(&self, options: TableNamesBuilder) -> Result<Vec<String>> {
|
async fn table_names(&self, options: TableNamesBuilder) -> Result<Vec<String>> {
|
||||||
@@ -133,14 +153,40 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
|||||||
let req = self
|
let req = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/create/", options.name))
|
.post(&format!("/v1/table/{}/create/", options.name))
|
||||||
|
.query(&[("mode", Into::<&str>::into(&options.mode))])
|
||||||
.body(data_buffer)
|
.body(data_buffer)
|
||||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||||
|
|
||||||
let (request_id, rsp) = self.client.send(req, false).await?;
|
let (request_id, rsp) = self.client.send(req, false).await?;
|
||||||
|
|
||||||
if rsp.status() == StatusCode::BAD_REQUEST {
|
if rsp.status() == StatusCode::BAD_REQUEST {
|
||||||
let body = rsp.text().await.err_to_http(request_id.clone())?;
|
let body = rsp.text().await.err_to_http(request_id.clone())?;
|
||||||
if body.contains("already exists") {
|
if body.contains("already exists") {
|
||||||
return Err(crate::Error::TableAlreadyExists { name: options.name });
|
return match options.mode {
|
||||||
|
CreateTableMode::Create => {
|
||||||
|
Err(crate::Error::TableAlreadyExists { name: options.name })
|
||||||
|
}
|
||||||
|
CreateTableMode::ExistOk(callback) => {
|
||||||
|
let builder = OpenTableBuilder::new(options.parent, options.name);
|
||||||
|
let builder = (callback)(builder);
|
||||||
|
builder.execute().await
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should not happen, as we explicitly set the mode to overwrite and the server
|
||||||
|
// shouldn't return an error if the table already exists.
|
||||||
|
//
|
||||||
|
// However if the server is an older version that doesn't support the mode parameter,
|
||||||
|
// then we'll get the 400 response.
|
||||||
|
CreateTableMode::Overwrite => Err(crate::Error::Http {
|
||||||
|
source: format!(
|
||||||
|
"unexpected response from server for create mode overwrite: {}",
|
||||||
|
body
|
||||||
|
)
|
||||||
|
.into(),
|
||||||
|
request_id,
|
||||||
|
status_code: Some(StatusCode::BAD_REQUEST),
|
||||||
|
}),
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
return Err(crate::Error::InvalidInput { message: body });
|
return Err(crate::Error::InvalidInput { message: body });
|
||||||
}
|
}
|
||||||
@@ -206,6 +252,29 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct RemoteOptions(pub HashMap<String, String>);
|
||||||
|
|
||||||
|
impl RemoteOptions {
|
||||||
|
pub fn new(options: HashMap<String, String>) -> Self {
|
||||||
|
Self(options)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<StorageOptions> for RemoteOptions {
|
||||||
|
fn from(options: StorageOptions) -> Self {
|
||||||
|
let supported_opts = vec!["account_name", "azure_storage_account_name"];
|
||||||
|
let mut filtered = HashMap::new();
|
||||||
|
for opt in supported_opts {
|
||||||
|
if let Some(v) = options.0.get(opt) {
|
||||||
|
filtered.insert(opt.to_string(), v.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RemoteOptions::new(filtered)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::{Arc, OnceLock};
|
||||||
@@ -213,7 +282,9 @@ mod tests {
|
|||||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
|
|
||||||
|
use crate::connection::ConnectBuilder;
|
||||||
use crate::{
|
use crate::{
|
||||||
|
connection::CreateTableMode,
|
||||||
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
|
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
|
||||||
Connection, Error,
|
Connection, Error,
|
||||||
};
|
};
|
||||||
@@ -382,6 +453,73 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_table_modes() {
|
||||||
|
let test_cases = [
|
||||||
|
(None, "mode=create"),
|
||||||
|
(Some(CreateTableMode::Create), "mode=create"),
|
||||||
|
(Some(CreateTableMode::Overwrite), "mode=overwrite"),
|
||||||
|
(
|
||||||
|
Some(CreateTableMode::ExistOk(Box::new(|b| b))),
|
||||||
|
"mode=exist_ok",
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (mode, expected_query_string) in test_cases {
|
||||||
|
let conn = Connection::new_with_handler(move |request| {
|
||||||
|
assert_eq!(request.method(), &reqwest::Method::POST);
|
||||||
|
assert_eq!(request.url().path(), "/v1/table/table1/create/");
|
||||||
|
assert_eq!(request.url().query(), Some(expected_query_string));
|
||||||
|
|
||||||
|
http::Response::builder().status(200).body("").unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
let data = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||||
|
let mut builder = conn.create_table("table1", reader);
|
||||||
|
if let Some(mode) = mode {
|
||||||
|
builder = builder.mode(mode);
|
||||||
|
}
|
||||||
|
builder.execute().await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that the open table callback is called with exist_ok
|
||||||
|
let conn = Connection::new_with_handler(|request| match request.url().path() {
|
||||||
|
"/v1/table/table1/create/" => http::Response::builder()
|
||||||
|
.status(400)
|
||||||
|
.body("Table table1 already exists")
|
||||||
|
.unwrap(),
|
||||||
|
"/v1/table/table1/describe/" => http::Response::builder().status(200).body("").unwrap(),
|
||||||
|
_ => {
|
||||||
|
panic!("unexpected path: {:?}", request.url().path());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let data = RecordBatch::try_new(
|
||||||
|
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let called: Arc<OnceLock<bool>> = Arc::new(OnceLock::new());
|
||||||
|
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||||
|
let called_in_cb = called.clone();
|
||||||
|
conn.create_table("table1", reader)
|
||||||
|
.mode(CreateTableMode::ExistOk(Box::new(move |b| {
|
||||||
|
called_in_cb.clone().set(true).unwrap();
|
||||||
|
b
|
||||||
|
})))
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let called = *called.get().unwrap_or(&false);
|
||||||
|
assert!(called);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_create_table_empty() {
|
async fn test_create_table_empty() {
|
||||||
let conn = Connection::new_with_handler(|request| {
|
let conn = Connection::new_with_handler(|request| {
|
||||||
@@ -436,4 +574,16 @@ mod tests {
|
|||||||
});
|
});
|
||||||
conn.rename_table("table1", "table2").await.unwrap();
|
conn.rename_table("table1", "table2").await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_connect_remote_options() {
|
||||||
|
let db_uri = "db://my-container/my-prefix";
|
||||||
|
let _ = ConnectBuilder::new(db_uri)
|
||||||
|
.region("us-east-1")
|
||||||
|
.api_key("my-api-key")
|
||||||
|
.storage_options(vec![("azure_storage_account_name", "my-storage-account")])
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1227,6 +1227,7 @@ mod tests {
|
|||||||
"prefilter": true,
|
"prefilter": true,
|
||||||
"distance_type": "l2",
|
"distance_type": "l2",
|
||||||
"nprobes": 20,
|
"nprobes": 20,
|
||||||
|
"k": 10,
|
||||||
"ef": Option::<usize>::None,
|
"ef": Option::<usize>::None,
|
||||||
"refine_factor": null,
|
"refine_factor": null,
|
||||||
"version": null,
|
"version": null,
|
||||||
|
|||||||
Reference in New Issue
Block a user