mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-10 05:42:58 +00:00
Compare commits
13 Commits
1176/alber
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb26f31beb | ||
|
|
7c138c54c4 | ||
|
|
e9011b71b1 | ||
|
|
1b605ecc3b | ||
|
|
bcc879b74a | ||
|
|
fad0b76159 | ||
|
|
8364d589ab | ||
|
|
8687735bea | ||
|
|
f0cd43da69 | ||
|
|
7b954c7e3e | ||
|
|
2579f29a92 | ||
|
|
7562b0fad1 | ||
|
|
83b6b0d28a |
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.4.15
|
current_version = 0.4.16
|
||||||
commit = True
|
commit = True
|
||||||
message = Bump version: {current_version} → {new_version}
|
message = Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -14,6 +14,10 @@ inputs:
|
|||||||
# Note: this does *not* mean the host is arm64, since we might be cross-compiling.
|
# Note: this does *not* mean the host is arm64, since we might be cross-compiling.
|
||||||
required: false
|
required: false
|
||||||
default: "false"
|
default: "false"
|
||||||
|
manylinux:
|
||||||
|
description: "The manylinux version to build for"
|
||||||
|
required: false
|
||||||
|
default: "2_17"
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
@@ -28,7 +32,7 @@ runs:
|
|||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
target: x86_64-unknown-linux-gnu
|
target: x86_64-unknown-linux-gnu
|
||||||
manylinux: "2_17"
|
manylinux: ${{ inputs.manylinux }}
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
before-script-linux: |
|
before-script-linux: |
|
||||||
set -e
|
set -e
|
||||||
@@ -43,7 +47,7 @@ runs:
|
|||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
target: aarch64-unknown-linux-gnu
|
target: aarch64-unknown-linux-gnu
|
||||||
manylinux: "2_24"
|
manylinux: ${{ inputs.manylinux }}
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
before-script-linux: |
|
before-script-linux: |
|
||||||
set -e
|
set -e
|
||||||
|
|||||||
23
.github/workflows/pypi-publish.yml
vendored
23
.github/workflows/pypi-publish.yml
vendored
@@ -6,13 +6,23 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linux:
|
linux:
|
||||||
|
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
|
||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-minor-version: ["8"]
|
python-minor-version: ["8"]
|
||||||
platform:
|
config:
|
||||||
- x86_64
|
- platform: x86_64
|
||||||
- aarch64
|
manylinux: "2_17"
|
||||||
|
extra_args: ""
|
||||||
|
- platform: x86_64
|
||||||
|
manylinux: "2_28"
|
||||||
|
extra_args: "--features fp16kernels"
|
||||||
|
- platform: aarch64
|
||||||
|
manylinux: "2_24"
|
||||||
|
extra_args: ""
|
||||||
|
# We don't build fp16 kernels for aarch64, because it uses
|
||||||
|
# cross compilation image, which doesn't have a new enough compiler.
|
||||||
runs-on: "ubuntu-22.04"
|
runs-on: "ubuntu-22.04"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -26,8 +36,9 @@ jobs:
|
|||||||
- uses: ./.github/workflows/build_linux_wheel
|
- uses: ./.github/workflows/build_linux_wheel
|
||||||
with:
|
with:
|
||||||
python-minor-version: ${{ matrix.python-minor-version }}
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
args: "--release --strip"
|
args: "--release --strip ${{ matrix.config.extra_args }}"
|
||||||
arm-build: ${{ matrix.platform == 'aarch64' }}
|
arm-build: ${{ matrix.config.platform == 'aarch64' }}
|
||||||
|
manylinux: ${{ matrix.config.manylinux }}
|
||||||
- uses: ./.github/workflows/upload_wheel
|
- uses: ./.github/workflows/upload_wheel
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||||
@@ -58,7 +69,7 @@ jobs:
|
|||||||
- uses: ./.github/workflows/build_mac_wheel
|
- uses: ./.github/workflows/build_mac_wheel
|
||||||
with:
|
with:
|
||||||
python-minor-version: ${{ matrix.python-minor-version }}
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
args: "--release --strip --target ${{ matrix.config.target }}"
|
args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
|
||||||
- uses: ./.github/workflows/upload_wheel
|
- uses: ./.github/workflows/upload_wheel
|
||||||
with:
|
with:
|
||||||
python-minor-version: ${{ matrix.python-minor-version }}
|
python-minor-version: ${{ matrix.python-minor-version }}
|
||||||
|
|||||||
8
.github/workflows/rust.yml
vendored
8
.github/workflows/rust.yml
vendored
@@ -31,6 +31,10 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
working-directory: rust
|
working-directory: rust
|
||||||
|
env:
|
||||||
|
# Need up-to-date compilers for kernels
|
||||||
|
CC: gcc-12
|
||||||
|
CXX: g++-12
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
@@ -54,6 +58,10 @@ jobs:
|
|||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
working-directory: rust
|
working-directory: rust
|
||||||
|
env:
|
||||||
|
# Need up-to-date compilers for kernels
|
||||||
|
CC: gcc-12
|
||||||
|
CXX: g++-12
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
|||||||
categories = ["database-implementations"]
|
categories = ["database-implementations"]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.10.6", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.10.9", "features" = ["dynamodb"] }
|
||||||
lance-index = { "version" = "=0.10.6" }
|
lance-index = { "version" = "=0.10.9" }
|
||||||
lance-linalg = { "version" = "=0.10.6" }
|
lance-linalg = { "version" = "=0.10.9" }
|
||||||
lance-testing = { "version" = "=0.10.6" }
|
lance-testing = { "version" = "=0.10.9" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "50.0", optional = false }
|
arrow = { version = "50.0", optional = false }
|
||||||
arrow-array = "50.0"
|
arrow-array = "50.0"
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ Currently, Lance supports a growing list of SQL expressions.
|
|||||||
- `LIKE`, `NOT LIKE`
|
- `LIKE`, `NOT LIKE`
|
||||||
- `CAST`
|
- `CAST`
|
||||||
- `regexp_match(column, pattern)`
|
- `regexp_match(column, pattern)`
|
||||||
|
- [DataFusion Functions](https://arrow.apache.org/datafusion/user-guide/sql/scalar_functions.html)
|
||||||
|
|
||||||
For example, the following filter string is acceptable:
|
For example, the following filter string is acceptable:
|
||||||
|
|
||||||
|
|||||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -52,11 +52,11 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.4.15",
|
"@lancedb/vectordb-darwin-arm64": "0.4.16",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.4.15",
|
"@lancedb/vectordb-darwin-x64": "0.4.16",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.15",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.4.16",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.15",
|
"@lancedb/vectordb-linux-x64-gnu": "0.4.16",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.15"
|
"@lancedb/vectordb-win32-x64-msvc": "0.4.16"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@apache-arrow/ts": "^14.0.2",
|
"@apache-arrow/ts": "^14.0.2",
|
||||||
@@ -333,66 +333,6 @@
|
|||||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
|
||||||
"version": "0.4.15",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.15.tgz",
|
|
||||||
"integrity": "sha512-asNVd0ojbExbj/iRCFu/+vpnnGtAHbrn7AjrL1PFeSl1JvsvzeZBBxq+WlM4UfGKfiJhkyBvQwh609OiCP3Snw==",
|
|
||||||
"cpu": [
|
|
||||||
"arm64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"darwin"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
|
||||||
"version": "0.4.15",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.15.tgz",
|
|
||||||
"integrity": "sha512-tbMGb1P9KXdnoP6dqFnjhUUjGIVzzo2V/Ewc8iktMU1scCVQ7/rEPPvTh9jHuM1r1i+wVTNtKYijIVaHZZPJLA==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"darwin"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
|
||||||
"version": "0.4.15",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.15.tgz",
|
|
||||||
"integrity": "sha512-5oiWvS9Y5mRc2PhaVzu9zoM1UFf77gDgX8IM95U87CZZdPAoREDvnbVhyIZa2SEZccEjLG7tShe+PJsOFWOT/w==",
|
|
||||||
"cpu": [
|
|
||||||
"arm64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"linux"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
|
||||||
"version": "0.4.15",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.15.tgz",
|
|
||||||
"integrity": "sha512-FjlrV2h97t5aoammRpPSBjws2cWSKYQpzALHm8Af8QjTuvo9J/MD63Fr/D6Sb9Ie2ER3pCiWD1o8UCFndCFtRg==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"linux"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
|
||||||
"version": "0.4.15",
|
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.15.tgz",
|
|
||||||
"integrity": "sha512-o3zlY/FnY426kp2Y7xn4UbD6/Oeg5xqtezYNhhmt44lCmhSlpydx+2m9Fq1OGSDNUCWtrhMhgdHVnGTu2VTa5A==",
|
|
||||||
"cpu": [
|
|
||||||
"x64"
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"os": [
|
|
||||||
"win32"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"node_modules/@neon-rs/cli": {
|
"node_modules/@neon-rs/cli": {
|
||||||
"version": "0.0.160",
|
"version": "0.0.160",
|
||||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -88,10 +88,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.4.15",
|
"@lancedb/vectordb-darwin-arm64": "0.4.16",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.4.15",
|
"@lancedb/vectordb-darwin-x64": "0.4.16",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.15",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.4.16",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.15",
|
"@lancedb/vectordb-linux-x64-gnu": "0.4.16",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.15"
|
"@lancedb/vectordb-win32-x64-msvc": "0.4.16"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ export class Query<T = number[]> {
|
|||||||
constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
|
constructor (query?: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
|
||||||
this._tbl = tbl
|
this._tbl = tbl
|
||||||
this._query = query
|
this._query = query
|
||||||
this._limit = undefined
|
this._limit = 10
|
||||||
this._nprobes = 20
|
this._nprobes = 20
|
||||||
this._refineFactor = undefined
|
this._refineFactor = undefined
|
||||||
this._select = undefined
|
this._select = undefined
|
||||||
@@ -50,6 +50,7 @@ export class Query<T = number[]> {
|
|||||||
|
|
||||||
/***
|
/***
|
||||||
* Sets the number of results that will be returned
|
* Sets the number of results that will be returned
|
||||||
|
* default value is 10
|
||||||
* @param value number of results
|
* @param value number of results
|
||||||
*/
|
*/
|
||||||
limit (value: number): Query<T> {
|
limit (value: number): Query<T> {
|
||||||
|
|||||||
@@ -103,6 +103,18 @@ function toLanceRes (res: AxiosResponse): RemoteResponse {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function decodeErrorData(
|
||||||
|
res: RemoteResponse,
|
||||||
|
responseType?: ResponseType
|
||||||
|
): Promise<string> {
|
||||||
|
const errorData = await res.body()
|
||||||
|
if (responseType === 'arraybuffer') {
|
||||||
|
return new TextDecoder().decode(errorData)
|
||||||
|
} else {
|
||||||
|
return errorData
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class HttpLancedbClient {
|
export class HttpLancedbClient {
|
||||||
private readonly _url: string
|
private readonly _url: string
|
||||||
private readonly _apiKey: () => string
|
private readonly _apiKey: () => string
|
||||||
@@ -180,7 +192,7 @@ export class HttpLancedbClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status !== 200) {
|
if (response.status !== 200) {
|
||||||
const errorData = new TextDecoder().decode(await response.body())
|
const errorData = await decodeErrorData(response)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Server Error, status: ${response.status}, ` +
|
`Server Error, status: ${response.status}, ` +
|
||||||
`message: ${response.statusText}: ${errorData}`
|
`message: ${response.statusText}: ${errorData}`
|
||||||
@@ -226,7 +238,7 @@ export class HttpLancedbClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status !== 200) {
|
if (response.status !== 200) {
|
||||||
const errorData = new TextDecoder().decode(await response.body())
|
const errorData = await decodeErrorData(response, responseType)
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Server Error, status: ${response.status}, ` +
|
`Server Error, status: ${response.status}, ` +
|
||||||
`message: ${response.statusText}: ${errorData}`
|
`message: ${response.statusText}: ${errorData}`
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ export class RemoteConnection implements Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${tableName}/create/`,
|
`/v1/table/${encodeURIComponent(tableName)}/create/`,
|
||||||
buffer,
|
buffer,
|
||||||
undefined,
|
undefined,
|
||||||
'application/vnd.apache.arrow.stream'
|
'application/vnd.apache.arrow.stream'
|
||||||
@@ -177,7 +177,7 @@ export class RemoteConnection implements Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async dropTable (name: string): Promise<void> {
|
async dropTable (name: string): Promise<void> {
|
||||||
await this._client.post(`/v1/table/${name}/drop/`)
|
await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
|
||||||
}
|
}
|
||||||
|
|
||||||
withMiddleware (middleware: HttpMiddleware): Connection {
|
withMiddleware (middleware: HttpMiddleware): Connection {
|
||||||
@@ -268,7 +268,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
|
|
||||||
get schema (): Promise<any> {
|
get schema (): Promise<any> {
|
||||||
return this._client
|
return this._client
|
||||||
.post(`/v1/table/${this._name}/describe/`)
|
.post(`/v1/table/${encodeURIComponent(this._name)}/describe/`)
|
||||||
.then(async (res) => {
|
.then(async (res) => {
|
||||||
if (res.status !== 200) {
|
if (res.status !== 200) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
@@ -282,7 +282,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
search (query: T): Query<T> {
|
search (query: T): Query<T> {
|
||||||
return new RemoteQuery(query, this._client, this._name) //, this._embeddings_new)
|
return new RemoteQuery(query, this._client, encodeURIComponent(this._name)) //, this._embeddings_new)
|
||||||
}
|
}
|
||||||
|
|
||||||
filter (where: string): Query<T> {
|
filter (where: string): Query<T> {
|
||||||
@@ -324,7 +324,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
|
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${this._name}/merge_insert/`,
|
`/v1/table/${encodeURIComponent(this._name)}/merge_insert/`,
|
||||||
buffer,
|
buffer,
|
||||||
queryParams,
|
queryParams,
|
||||||
'application/vnd.apache.arrow.stream'
|
'application/vnd.apache.arrow.stream'
|
||||||
@@ -348,7 +348,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
|
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${this._name}/insert/`,
|
`/v1/table/${encodeURIComponent(this._name)}/insert/`,
|
||||||
buffer,
|
buffer,
|
||||||
{
|
{
|
||||||
mode: 'append'
|
mode: 'append'
|
||||||
@@ -374,7 +374,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
}
|
}
|
||||||
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
const buffer = await fromTableToStreamBuffer(tbl, this._embeddings)
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${this._name}/insert/`,
|
`/v1/table/${encodeURIComponent(this._name)}/insert/`,
|
||||||
buffer,
|
buffer,
|
||||||
{
|
{
|
||||||
mode: 'overwrite'
|
mode: 'overwrite'
|
||||||
@@ -421,7 +421,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
index_cache_size: indexCacheSize
|
index_cache_size: indexCacheSize
|
||||||
}
|
}
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${this._name}/create_index/`,
|
`/v1/table/${encodeURIComponent(this._name)}/create_index/`,
|
||||||
data
|
data
|
||||||
)
|
)
|
||||||
if (res.status !== 200) {
|
if (res.status !== 200) {
|
||||||
@@ -442,7 +442,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
replace: true
|
replace: true
|
||||||
}
|
}
|
||||||
const res = await this._client.post(
|
const res = await this._client.post(
|
||||||
`/v1/table/${this._name}/create_scalar_index/`,
|
`/v1/table/${encodeURIComponent(this._name)}/create_scalar_index/`,
|
||||||
data
|
data
|
||||||
)
|
)
|
||||||
if (res.status !== 200) {
|
if (res.status !== 200) {
|
||||||
@@ -455,14 +455,14 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async countRows (filter?: string): Promise<number> {
|
async countRows (filter?: string): Promise<number> {
|
||||||
const result = await this._client.post(`/v1/table/${this._name}/count_rows/`, {
|
const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
|
||||||
predicate: filter
|
predicate: filter
|
||||||
})
|
})
|
||||||
return (await result.body())?.stats?.num_rows
|
return (await result.body())
|
||||||
}
|
}
|
||||||
|
|
||||||
async delete (filter: string): Promise<void> {
|
async delete (filter: string): Promise<void> {
|
||||||
await this._client.post(`/v1/table/${this._name}/delete/`, {
|
await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/delete/`, {
|
||||||
predicate: filter
|
predicate: filter
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -481,7 +481,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
updates[key] = toSQL(value)
|
updates[key] = toSQL(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await this._client.post(`/v1/table/${this._name}/update/`, {
|
await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/update/`, {
|
||||||
predicate: filter,
|
predicate: filter,
|
||||||
updates: Object.entries(updates).map(([key, value]) => [key, value])
|
updates: Object.entries(updates).map(([key, value]) => [key, value])
|
||||||
})
|
})
|
||||||
@@ -489,7 +489,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
|
|
||||||
async listIndices (): Promise<VectorIndex[]> {
|
async listIndices (): Promise<VectorIndex[]> {
|
||||||
const results = await this._client.post(
|
const results = await this._client.post(
|
||||||
`/v1/table/${this._name}/index/list/`
|
`/v1/table/${encodeURIComponent(this._name)}/index/list/`
|
||||||
)
|
)
|
||||||
return (await results.body()).indexes?.map((index: any) => ({
|
return (await results.body()).indexes?.map((index: any) => ({
|
||||||
columns: index.columns,
|
columns: index.columns,
|
||||||
@@ -500,7 +500,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
|||||||
|
|
||||||
async indexStats (indexUuid: string): Promise<IndexStats> {
|
async indexStats (indexUuid: string): Promise<IndexStats> {
|
||||||
const results = await this._client.post(
|
const results = await this._client.post(
|
||||||
`/v1/table/${this._name}/index/${indexUuid}/stats/`
|
`/v1/table/${encodeURIComponent(this._name)}/index/${indexUuid}/stats/`
|
||||||
)
|
)
|
||||||
const body = await results.body()
|
const body = await results.body()
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -124,9 +124,9 @@ describe('LanceDB client', function () {
|
|||||||
const uri = await createTestDB(2, 100)
|
const uri = await createTestDB(2, 100)
|
||||||
const con = await lancedb.connect(uri)
|
const con = await lancedb.connect(uri)
|
||||||
const table = (await con.openTable('vectors')) as LocalTable
|
const table = (await con.openTable('vectors')) as LocalTable
|
||||||
let results = await table.filter('id % 2 = 0').execute()
|
let results = await table.filter('id % 2 = 0').limit(100).execute()
|
||||||
assertResults(results)
|
assertResults(results)
|
||||||
results = await table.where('id % 2 = 0').execute()
|
results = await table.where('id % 2 = 0').limit(100).execute()
|
||||||
assertResults(results)
|
assertResults(results)
|
||||||
|
|
||||||
// Should reject a bad filter
|
// Should reject a bad filter
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"os": [
|
"os": [
|
||||||
"darwin"
|
"darwin"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"os": [
|
"os": [
|
||||||
"darwin"
|
"darwin"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"os": [
|
"os": [
|
||||||
"linux"
|
"linux"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"os": [
|
"os": [
|
||||||
"linux"
|
"linux"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.4.15",
|
"version": "0.4.16",
|
||||||
"main": "./dist/index.js",
|
"main": "./dist/index.js",
|
||||||
"types": "./dist/index.d.ts",
|
"types": "./dist/index.d.ts",
|
||||||
"napi": {
|
"napi": {
|
||||||
@@ -67,11 +67,11 @@
|
|||||||
"version": "napi version"
|
"version": "napi version"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/lancedb-darwin-arm64": "0.4.15",
|
"@lancedb/lancedb-darwin-arm64": "0.4.16",
|
||||||
"@lancedb/lancedb-darwin-x64": "0.4.15",
|
"@lancedb/lancedb-darwin-x64": "0.4.16",
|
||||||
"@lancedb/lancedb-linux-arm64-gnu": "0.4.15",
|
"@lancedb/lancedb-linux-arm64-gnu": "0.4.16",
|
||||||
"@lancedb/lancedb-linux-x64-gnu": "0.4.15",
|
"@lancedb/lancedb-linux-x64-gnu": "0.4.16",
|
||||||
"@lancedb/lancedb-win32-x64-msvc": "0.4.15"
|
"@lancedb/lancedb-win32-x64-msvc": "0.4.16"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"openai": "^4.29.2",
|
"openai": "^4.29.2",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.6.6
|
current_version = 0.6.7
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -31,3 +31,6 @@ pyo3-build-config = { version = "0.20.3", features = [
|
|||||||
"extension-module",
|
"extension-module",
|
||||||
"abi3-py38",
|
"abi3-py38",
|
||||||
] }
|
] }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
fp16kernels = ["lancedb/fp16kernels"]
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.6.6"
|
version = "0.6.7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.10.6",
|
"pylance==0.10.9",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
@@ -41,6 +41,7 @@ classifiers = [
|
|||||||
"Programming Language :: Python :: 3.9",
|
"Programming Language :: Python :: 3.9",
|
||||||
"Programming Language :: Python :: 3.10",
|
"Programming Language :: Python :: 3.10",
|
||||||
"Programming Language :: Python :: 3.11",
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
"Topic :: Scientific/Engineering",
|
"Topic :: Scientific/Engineering",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -500,7 +500,10 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
def count_rows(self, filter: Optional[str] = None) -> int:
|
def count_rows(self, filter: Optional[str] = None) -> int:
|
||||||
payload = {"predicate": filter}
|
payload = {"predicate": filter}
|
||||||
self._conn._client.post(f"/v1/table/{self._name}/count_rows/", data=payload)
|
resp = self._conn._client.post(
|
||||||
|
f"/v1/table/{self._name}/count_rows/", data=payload
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
|
||||||
def add_columns(self, transforms: Dict[str, str]):
|
def add_columns(self, transforms: Dict[str, str]):
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.4.15"
|
version = "0.4.16"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.4.15"
|
version = "0.4.16"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -50,3 +50,4 @@ walkdir = "2"
|
|||||||
[features]
|
[features]
|
||||||
default = ["remote"]
|
default = ["remote"]
|
||||||
remote = ["dep:reqwest"]
|
remote = ["dep:reqwest"]
|
||||||
|
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||||
@@ -39,29 +39,11 @@ use tokio::{
|
|||||||
struct MirroringObjectStore {
|
struct MirroringObjectStore {
|
||||||
primary: Arc<dyn ObjectStore>,
|
primary: Arc<dyn ObjectStore>,
|
||||||
secondary: Arc<dyn ObjectStore>,
|
secondary: Arc<dyn ObjectStore>,
|
||||||
secondary_copy_behavior: MirroringSecondaryCopy,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MirroringObjectStore {
|
|
||||||
async fn secondary_copy(&self, from: &Path, to: &Path) -> Result<()> {
|
|
||||||
let secondary_cp_result = self.secondary.copy(from, to).await;
|
|
||||||
match (&self.secondary_copy_behavior, secondary_cp_result) {
|
|
||||||
(_, Ok(_)) => Ok(()),
|
|
||||||
(
|
|
||||||
MirroringSecondaryCopy::SkipIfNotFound,
|
|
||||||
Err(object_store::Error::NotFound { path: _, source: _ }),
|
|
||||||
) => Ok(()),
|
|
||||||
(_, Err(e)) => Err(e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for MirroringObjectStore {
|
impl std::fmt::Display for MirroringObjectStore {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "MirroringObjectStore(secondary_copy_behavior=")?;
|
writeln!(f, "MirrowingObjectStore")?;
|
||||||
self.secondary_copy_behavior.fmt(f)?;
|
|
||||||
writeln!(f, ")")?;
|
|
||||||
|
|
||||||
writeln!(f, "primary:")?;
|
writeln!(f, "primary:")?;
|
||||||
self.primary.fmt(f)?;
|
self.primary.fmt(f)?;
|
||||||
writeln!(f, "secondary:")?;
|
writeln!(f, "secondary:")?;
|
||||||
@@ -80,40 +62,12 @@ impl PrimaryOnly for Path {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Controls the behavior of copying objects in the secondary store.
|
/// An object store that mirrors write to secondsry object store first
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum MirroringSecondaryCopy {
|
|
||||||
// Default behaviour is to copy
|
|
||||||
Copy,
|
|
||||||
// Since the secondary store may not be as durable as the primary, the copy source
|
|
||||||
// may exist on the primary but not on the secondary. If the source is not found,
|
|
||||||
// this skips making the copy
|
|
||||||
SkipIfNotFound,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for MirroringSecondaryCopy {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::Copy
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for MirroringSecondaryCopy {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
Self::Copy => write!(f, "Copy"),
|
|
||||||
Self::SkipIfNotFound => write!(f, "SkipIfNotFound"),
|
|
||||||
}?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An object store that mirrors write to secondary object store first
|
|
||||||
/// and than commit to primary object store.
|
/// and than commit to primary object store.
|
||||||
///
|
///
|
||||||
/// This is meant to mirror writes to a less-durable but lower-latency
|
/// This is meant to mirrow writes to a less-durable but lower-latency
|
||||||
/// store. We have primary store that is durable but slow, and a secondary
|
/// store. We have primary store that is durable but slow, and a secondary
|
||||||
/// store that is fast but not as durable
|
/// store that is fast but not asdurable
|
||||||
///
|
///
|
||||||
/// Note: this object store does not mirror writes to *.manifest files
|
/// Note: this object store does not mirror writes to *.manifest files
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@@ -202,7 +156,7 @@ impl ObjectStore for MirroringObjectStore {
|
|||||||
if to.primary_only() {
|
if to.primary_only() {
|
||||||
self.primary.copy(from, to).await
|
self.primary.copy(from, to).await
|
||||||
} else {
|
} else {
|
||||||
self.secondary_copy(from, to).await?;
|
self.secondary.copy(from, to).await?;
|
||||||
self.primary.copy(from, to).await?;
|
self.primary.copy(from, to).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -210,7 +164,7 @@ impl ObjectStore for MirroringObjectStore {
|
|||||||
|
|
||||||
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
|
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
|
||||||
if !to.primary_only() {
|
if !to.primary_only() {
|
||||||
self.secondary_copy(from, to).await?;
|
self.secondary.copy(from, to).await?;
|
||||||
}
|
}
|
||||||
self.primary.copy_if_not_exists(from, to).await
|
self.primary.copy_if_not_exists(from, to).await
|
||||||
}
|
}
|
||||||
@@ -360,152 +314,33 @@ impl AsyncWrite for MirroringUpload {
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct MirroringObjectStoreWrapper {
|
pub struct MirroringObjectStoreWrapper {
|
||||||
secondary: Arc<dyn ObjectStore>,
|
secondary: Arc<dyn ObjectStore>,
|
||||||
secondary_copy_behavior: MirroringSecondaryCopy,
|
|
||||||
secondary_wrapper: Option<Arc<dyn WrappingObjectStore>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MirroringObjectStoreWrapper {
|
impl MirroringObjectStoreWrapper {
|
||||||
pub fn new(secondary: Arc<dyn ObjectStore>) -> Self {
|
pub fn new(secondary: Arc<dyn ObjectStore>) -> Self {
|
||||||
Self {
|
Self { secondary }
|
||||||
secondary,
|
|
||||||
secondary_copy_behavior: MirroringSecondaryCopy::default(),
|
|
||||||
secondary_wrapper: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_secondary_copy_behavior(
|
|
||||||
mut self,
|
|
||||||
secondary_copy_behavior: MirroringSecondaryCopy,
|
|
||||||
) -> Self {
|
|
||||||
self.secondary_copy_behavior = secondary_copy_behavior;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_secondary_wrapper(mut self, wrapper: Arc<dyn WrappingObjectStore>) -> Self {
|
|
||||||
self.secondary_wrapper = Some(wrapper);
|
|
||||||
self
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WrappingObjectStore for MirroringObjectStoreWrapper {
|
impl WrappingObjectStore for MirroringObjectStoreWrapper {
|
||||||
fn wrap(&self, primary: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
fn wrap(&self, primary: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
||||||
let mut secondary = self.secondary.clone();
|
|
||||||
if let Some(wrapper) = &self.secondary_wrapper {
|
|
||||||
secondary = wrapper.wrap(secondary);
|
|
||||||
}
|
|
||||||
Arc::new(MirroringObjectStore {
|
Arc::new(MirroringObjectStore {
|
||||||
primary,
|
primary,
|
||||||
secondary,
|
secondary: self.secondary.clone(),
|
||||||
secondary_copy_behavior: self.secondary_copy_behavior.clone(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An object store that will check if the source of the copy exists before attempting
|
|
||||||
/// to copy the object.
|
|
||||||
///
|
|
||||||
/// The primary use case is to workaround a bug in version 0.9 of object_store where
|
|
||||||
/// copying from a non-existent source causes the thread to hang forever
|
|
||||||
/// https://github.com/apache/arrow-rs/issues/5503
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct CheckedCopyObjectStore {
|
|
||||||
inner: Arc<dyn ObjectStore>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for CheckedCopyObjectStore {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
||||||
writeln!(f, "CheckedCopyObjectStore")?;
|
|
||||||
writeln!(f, "inner:")?;
|
|
||||||
self.inner.fmt(f)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl ObjectStore for CheckedCopyObjectStore {
|
|
||||||
async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
|
|
||||||
self.inner.put_opts(location, bytes, opts).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn put_multipart(
|
|
||||||
&self,
|
|
||||||
location: &Path,
|
|
||||||
) -> Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
|
|
||||||
self.inner.put_multipart(location).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
|
|
||||||
self.inner.abort_multipart(location, multipart_id).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
|
|
||||||
self.inner.get_opts(location, options).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn delete(&self, location: &Path) -> Result<()> {
|
|
||||||
self.inner.delete(location).await
|
|
||||||
}
|
|
||||||
|
|
||||||
fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
|
|
||||||
self.inner.list(prefix)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
|
|
||||||
self.inner.list_with_delimiter(prefix).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
|
|
||||||
// check that the from object exists
|
|
||||||
self.inner.head(from).await?;
|
|
||||||
self.inner.copy(from, to).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
|
|
||||||
// check that the from object exists
|
|
||||||
self.inner.head(from).await?;
|
|
||||||
self.inner.copy_if_not_exists(from, to).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct CheckedCopyObjectStoreWrapper {}
|
|
||||||
|
|
||||||
impl CheckedCopyObjectStoreWrapper {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for CheckedCopyObjectStoreWrapper {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WrappingObjectStore for CheckedCopyObjectStoreWrapper {
|
|
||||||
fn wrap(&self, inner: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
|
||||||
Arc::new(CheckedCopyObjectStore { inner })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// windows pathing can't be simply concatenated
|
// windows pathing can't be simply concatenated
|
||||||
#[cfg(all(test, not(windows)))]
|
#[cfg(all(test, not(windows)))]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use lance::{
|
use lance::{dataset::WriteParams, io::ObjectStoreParams};
|
||||||
dataset::WriteParams,
|
|
||||||
io::{ObjectStore, ObjectStoreParams},
|
|
||||||
};
|
|
||||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
use lance_testing::datagen::{BatchGenerator, IncrementingInt32, RandomVector};
|
||||||
use object_store::local::LocalFileSystem;
|
use object_store::local::LocalFileSystem;
|
||||||
use object_store::ObjectStore as _;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::Write;
|
|
||||||
use std::time::Duration;
|
|
||||||
use tempfile;
|
use tempfile;
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
connect,
|
connect,
|
||||||
@@ -519,8 +354,9 @@ mod test {
|
|||||||
let dir2 = tempfile::tempdir().unwrap().into_path();
|
let dir2 = tempfile::tempdir().unwrap().into_path();
|
||||||
|
|
||||||
let secondary_store = LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap();
|
let secondary_store = LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap();
|
||||||
let object_store_wrapper =
|
let object_store_wrapper = Arc::new(MirroringObjectStoreWrapper {
|
||||||
Arc::new(MirroringObjectStoreWrapper::new(Arc::new(secondary_store)));
|
secondary: Arc::new(secondary_store),
|
||||||
|
});
|
||||||
|
|
||||||
let db = connect(dir1.to_str().unwrap()).execute().await.unwrap();
|
let db = connect(dir1.to_str().unwrap()).execute().await.unwrap();
|
||||||
|
|
||||||
@@ -557,9 +393,9 @@ mod test {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let batches = q.try_collect::<Vec<_>>().await.unwrap();
|
let bateches = q.try_collect::<Vec<_>>().await.unwrap();
|
||||||
assert_eq!(batches.len(), 1);
|
assert_eq!(bateches.len(), 1);
|
||||||
assert_eq!(batches[0].num_rows(), 10);
|
assert_eq!(bateches[0].num_rows(), 10);
|
||||||
|
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
@@ -594,100 +430,4 @@ mod test {
|
|||||||
secondary_elem = secondary_iter.next();
|
secondary_elem = secondary_iter.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_secondary_copy_skip_if_not_found() {
|
|
||||||
let dir1 = tempfile::tempdir().unwrap().into_path();
|
|
||||||
let dir2 = tempfile::tempdir().unwrap().into_path();
|
|
||||||
|
|
||||||
// create a file that only exists in partition 1
|
|
||||||
let file_path = format!("{}/hello.txt", dir1.to_str().unwrap());
|
|
||||||
let mut file = File::create(file_path).unwrap();
|
|
||||||
file.write_all(b"hello").unwrap();
|
|
||||||
|
|
||||||
// check we can copy a file that exists on the primary while skipping the secondary
|
|
||||||
let secondary_store =
|
|
||||||
Arc::new(LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap());
|
|
||||||
let mirroring_wrapper = MirroringObjectStoreWrapper::new(secondary_store.clone())
|
|
||||||
.with_secondary_copy_behavior(MirroringSecondaryCopy::SkipIfNotFound)
|
|
||||||
.with_secondary_wrapper(Arc::new(CheckedCopyObjectStoreWrapper::new()));
|
|
||||||
|
|
||||||
let primary_store = LocalFileSystem::new_with_prefix(dir1.to_str().unwrap()).unwrap();
|
|
||||||
let store = ObjectStore::new(
|
|
||||||
Arc::new(primary_store) as _,
|
|
||||||
Url::from_directory_path(dir1.clone()).unwrap(),
|
|
||||||
None,
|
|
||||||
Some(Arc::new(mirroring_wrapper)),
|
|
||||||
);
|
|
||||||
|
|
||||||
let result = store
|
|
||||||
.copy(&Path::from("hello.txt"), &Path::from("hello2.txt"))
|
|
||||||
.await;
|
|
||||||
assert!(result.is_ok());
|
|
||||||
assert!(store.exists(&Path::from("hello2.txt")).await.unwrap());
|
|
||||||
|
|
||||||
// check that we will return an error if using MirroedSecondarryCopy::Copy and also that the primary copy does not succeed
|
|
||||||
let mirroring_wrapper = MirroringObjectStoreWrapper::new(secondary_store.clone())
|
|
||||||
.with_secondary_copy_behavior(MirroringSecondaryCopy::Copy)
|
|
||||||
.with_secondary_wrapper(Arc::new(CheckedCopyObjectStoreWrapper::new()));
|
|
||||||
let primary_store = LocalFileSystem::new_with_prefix(dir1.to_str().unwrap()).unwrap();
|
|
||||||
let store = ObjectStore::new(
|
|
||||||
Arc::new(primary_store) as _,
|
|
||||||
Url::from_directory_path(dir1).unwrap(),
|
|
||||||
None,
|
|
||||||
Some(Arc::new(mirroring_wrapper)),
|
|
||||||
);
|
|
||||||
|
|
||||||
let result = store
|
|
||||||
.copy(&Path::from("hello.txt"), &Path::from("hello3.txt"))
|
|
||||||
.await;
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(!store.exists(&Path::from("hello3.txt")).await.unwrap());
|
|
||||||
|
|
||||||
// check that if the file exists in the secondary store, we can successfully copy it
|
|
||||||
let file_path = format!("{}/hello.txt", dir2.to_str().unwrap());
|
|
||||||
let mut file = File::create(file_path).unwrap();
|
|
||||||
file.write_all(b"hello").unwrap();
|
|
||||||
|
|
||||||
let result = store
|
|
||||||
.copy(&Path::from("hello.txt"), &Path::from("hello3.txt"))
|
|
||||||
.await;
|
|
||||||
assert!(result.is_ok());
|
|
||||||
assert!(store.exists(&Path::from("hello3.txt")).await.unwrap());
|
|
||||||
assert!(secondary_store
|
|
||||||
.as_ref()
|
|
||||||
.head(&Path::from("hello3.txt"))
|
|
||||||
.await
|
|
||||||
.is_ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_copy_loop_avoidance() {
|
|
||||||
let dir1 = tempfile::tempdir().unwrap().into_path();
|
|
||||||
|
|
||||||
let object_store_wrapper = CheckedCopyObjectStoreWrapper::new();
|
|
||||||
|
|
||||||
let store_params = ObjectStoreParams {
|
|
||||||
object_store_wrapper: Some(Arc::new(object_store_wrapper)),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let (store, _) = ObjectStore::from_uri_and_params(dir1.to_str().unwrap(), &store_params)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// wrap in timeout to ensure we don't go into the infinite loop
|
|
||||||
// https://github.com/apache/arrow-rs/issues/5503
|
|
||||||
tokio::time::timeout(Duration::from_secs(10), async move {
|
|
||||||
let result = store
|
|
||||||
.copy(&Path::from("hello1.txt"), &Path::from("hello2.txt"))
|
|
||||||
.await;
|
|
||||||
if result.is_ok() {
|
|
||||||
return Err("copy should have errored".to_string());
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1304,14 +1304,7 @@ impl TableInternal for NativeTable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn count_rows(&self, filter: Option<String>) -> Result<usize> {
|
async fn count_rows(&self, filter: Option<String>) -> Result<usize> {
|
||||||
let dataset = self.dataset.get().await?;
|
Ok(self.dataset.get().await?.count_rows(filter).await?)
|
||||||
if let Some(filter) = filter {
|
|
||||||
let mut scanner = dataset.scan();
|
|
||||||
scanner.filter(&filter)?;
|
|
||||||
Ok(scanner.count_rows().await? as usize)
|
|
||||||
} else {
|
|
||||||
Ok(dataset.count_rows().await?)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn add(
|
async fn add(
|
||||||
|
|||||||
Reference in New Issue
Block a user