Compare commits

...

8 Commits

Author SHA1 Message Date
Lance Release
1ae08fe31d [python] Bump version: 0.6.2 → 0.6.3 2024-03-11 20:16:36 +00:00
Rob Meng
a517629c65 feat: configurable timeout for LanceDB Cloud queries (#1090) 2024-03-11 16:15:48 -04:00
Ivan Leo
553dae1607 Update default_embedding_functions.md (#1073)
Added a small bit of documentation for the `dim` feature which is
provided by the new `text-embedding-3` model series that allows users to
shorten an embedding.

Happy to discuss a bit on the phrasing but I struggled quite a bit with
getting it to work so wanted to help others who might want to use the
newer model too
2024-03-11 21:30:07 +05:30
Weston Pace
9c7e00eec3 Remove remote integration workflow (#1076) 2024-03-07 12:00:04 -08:00
Will Jones
a7d66032aa fix: Allow converting from NativeTable to Table (#1069) 2024-03-07 08:33:46 -08:00
Lance Release
7fb8a732a5 Updating package-lock.json 2024-03-07 01:05:09 +00:00
Lance Release
f393ac3b0d Updating package-lock.json 2024-03-06 23:26:48 +00:00
Lance Release
ca83354780 Bump version: 0.4.11 → 0.4.12 2024-03-06 23:26:38 +00:00
17 changed files with 66 additions and 147 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.4.11 current_version = 0.4.12
commit = True commit = True
message = Bump version: {current_version} → {new_version} message = Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -1,37 +0,0 @@
name: LanceDb Cloud Integration Test
on:
workflow_run:
workflows: [Rust]
types:
- completed
env:
LANCEDB_PROJECT: ${{ secrets.LANCEDB_PROJECT }}
LANCEDB_API_KEY: ${{ secrets.LANCEDB_API_KEY }}
LANCEDB_REGION: ${{ secrets.LANCEDB_REGION }}
jobs:
test:
timeout-minutes: 30
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
working-directory: rust
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: cargo build --all-features
- name: Run Integration test
run: cargo test --tests -- --ignored

View File

@@ -47,6 +47,7 @@ LanceDB registers the OpenAI embeddings function in the registry by default, as
| Parameter | Type | Default Value | Description | | Parameter | Type | Default Value | Description |
|---|---|---|---| |---|---|---|---|
| `name` | `str` | `"text-embedding-ada-002"` | The name of the model. | | `name` | `str` | `"text-embedding-ada-002"` | The name of the model. |
| `dim` | `int` | Model default | For OpenAI's newer text-embedding-3 model, we can specify a dimensionality that is smaller than the 1536 size. This feature supports it |
```python ```python

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.4.11", "version": "0.4.12",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.4.11", "version": "0.4.12",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.11", "@lancedb/vectordb-darwin-arm64": "0.4.12",
"@lancedb/vectordb-darwin-x64": "0.4.11", "@lancedb/vectordb-darwin-x64": "0.4.12",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.11", "@lancedb/vectordb-linux-arm64-gnu": "0.4.12",
"@lancedb/vectordb-linux-x64-gnu": "0.4.11", "@lancedb/vectordb-linux-x64-gnu": "0.4.12",
"@lancedb/vectordb-win32-x64-msvc": "0.4.11" "@lancedb/vectordb-win32-x64-msvc": "0.4.12"
}, },
"peerDependencies": { "peerDependencies": {
"@apache-arrow/ts": "^14.0.2", "@apache-arrow/ts": "^14.0.2",
@@ -334,9 +334,9 @@
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": { "node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.4.11", "version": "0.4.12",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.11.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.12.tgz",
"integrity": "sha512-JDOKmFnuJPFkA7ZmrzBJolROwSjWr7yMvAbi40uLBc25YbbVezodd30u2EFtIwWwtk1GqNYRZ49FZOElKYeC/Q==", "integrity": "sha512-38/rkJRlWXkPWXuj9onzvbrhnIWcIUQjgEp5G9v5ixPosBowm7A4j8e2Q8CJMsVSNcVX2JLqwWVldiWegZFuYw==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -346,9 +346,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-darwin-x64": { "node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.4.11", "version": "0.4.12",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.11.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.12.tgz",
"integrity": "sha512-iy6r+8tp2v1EFgJV52jusXtxgO6NY6SkpOdX41xPqN2mQWMkfUAR9Xtks1mgknjPOIKH4MRc8ZS0jcW/UWmilQ==", "integrity": "sha512-psE48dztyO450hXWdv9Rl9aayM2HQ1uF9wErfC0gKmDUh1N0NdVq2viDuFpZxnmCis/nvGwKlYiYT9OnYNCJ9g==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -358,9 +358,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-arm64-gnu": { "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.11", "version": "0.4.12",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.11.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.12.tgz",
"integrity": "sha512-5K6IVcTMuH0SZBjlqB5Gg39WC889FpTwIWKufxzQMMXrzxo5J3lKUHVoR28RRlNhDF2d9kZXBEyCpIfDFsV9iQ==", "integrity": "sha512-xwkgF6MiF5aAdG9JG8v4ke652YxUJrhs9z4OrsEfrENnvsIQd2C5UyKMepVLdvij4BI/XPFRFWXdjPvP7S9rTA==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -370,9 +370,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-x64-gnu": { "node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.11", "version": "0.4.12",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.11.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.12.tgz",
"integrity": "sha512-hF9ZChsdqKqqnivOzd9mE7lC3PmhZadXtwThi2RrsPiOLoEaGDfmr6Ni3amVQnB3bR8YEJtTxdQxe0NC4uW/8g==", "integrity": "sha512-gJqYR0aymrS+C60xc4EQPzmQ5/69XfeFv2ofBvAj7qW+c6BcnoAcfVl+7s1IrcWeGz251sm5cD5Lx4AzJd89dA==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -382,9 +382,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-win32-x64-msvc": { "node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.11", "version": "0.4.12",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.11.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.12.tgz",
"integrity": "sha512-0+9ut1ccKoqIyGxsVixwx3771Z+DXpl5WfSmOeA8kf3v3jlOg2H+0YUahiXLDid2ju+yeLPrAUYm7A1gKHVhew==", "integrity": "sha512-LhCzpyEeBUyO6L2fuVqeP3mW8kYDryyU9PNqcM01m88sZB1Do6AlwiM+GjPRQ0SpzD0LK9oxQqSmJrdcNGqjbw==",
"cpu": [ "cpu": [
"x64" "x64"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.4.11", "version": "0.4.12",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.11", "@lancedb/vectordb-darwin-arm64": "0.4.12",
"@lancedb/vectordb-darwin-x64": "0.4.11", "@lancedb/vectordb-darwin-x64": "0.4.12",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.11", "@lancedb/vectordb-linux-arm64-gnu": "0.4.12",
"@lancedb/vectordb-linux-x64-gnu": "0.4.11", "@lancedb/vectordb-linux-x64-gnu": "0.4.12",
"@lancedb/vectordb-win32-x64-msvc": "0.4.11" "@lancedb/vectordb-win32-x64-msvc": "0.4.12"
} }
} }

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.6.2 current_version = 0.6.3
commit = True commit = True
message = [python] Bump version: {current_version} → {new_version} message = [python] Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "lancedb" name = "lancedb"
version = "0.6.2" version = "0.6.3"
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.10.2", "pylance==0.10.2",

View File

@@ -35,6 +35,7 @@ def connect(
host_override: Optional[str] = None, host_override: Optional[str] = None,
read_consistency_interval: Optional[timedelta] = None, read_consistency_interval: Optional[timedelta] = None,
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None, request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
**kwargs,
) -> DBConnection: ) -> DBConnection:
"""Connect to a LanceDB database. """Connect to a LanceDB database.
@@ -99,7 +100,12 @@ def connect(
if isinstance(request_thread_pool, int): if isinstance(request_thread_pool, int):
request_thread_pool = ThreadPoolExecutor(request_thread_pool) request_thread_pool = ThreadPoolExecutor(request_thread_pool)
return RemoteDBConnection( return RemoteDBConnection(
uri, api_key, region, host_override, request_thread_pool=request_thread_pool uri,
api_key,
region,
host_override,
request_thread_pool=request_thread_pool,
**kwargs,
) )
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval) return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)

View File

@@ -58,6 +58,9 @@ class RestfulLanceDBClient:
closed: bool = attrs.field(default=False, init=False) closed: bool = attrs.field(default=False, init=False)
connection_timeout: float = attrs.field(default=120.0, kw_only=True)
read_timeout: float = attrs.field(default=300.0, kw_only=True)
@functools.cached_property @functools.cached_property
def session(self) -> requests.Session: def session(self) -> requests.Session:
sess = requests.Session() sess = requests.Session()
@@ -117,7 +120,7 @@ class RestfulLanceDBClient:
urljoin(self.url, uri), urljoin(self.url, uri),
params=params, params=params,
headers=self.headers, headers=self.headers,
timeout=(120.0, 300.0), timeout=(self.connection_timeout, self.read_timeout),
) as resp: ) as resp:
self._check_status(resp) self._check_status(resp)
return resp.json() return resp.json()
@@ -159,7 +162,7 @@ class RestfulLanceDBClient:
urljoin(self.url, uri), urljoin(self.url, uri),
headers=headers, headers=headers,
params=params, params=params,
timeout=(120.0, 300.0), timeout=(self.connection_timeout, self.read_timeout),
**req_kwargs, **req_kwargs,
) as resp: ) as resp:
self._check_status(resp) self._check_status(resp)

View File

@@ -41,6 +41,8 @@ class RemoteDBConnection(DBConnection):
region: str, region: str,
host_override: Optional[str] = None, host_override: Optional[str] = None,
request_thread_pool: Optional[ThreadPoolExecutor] = None, request_thread_pool: Optional[ThreadPoolExecutor] = None,
connection_timeout: float = 120.0,
read_timeout: float = 300.0,
): ):
"""Connect to a remote LanceDB database.""" """Connect to a remote LanceDB database."""
parsed = urlparse(db_url) parsed = urlparse(db_url)
@@ -49,7 +51,12 @@ class RemoteDBConnection(DBConnection):
self.db_name = parsed.netloc self.db_name = parsed.netloc
self.api_key = api_key self.api_key = api_key
self._client = RestfulLanceDBClient( self._client = RestfulLanceDBClient(
self.db_name, region, api_key, host_override self.db_name,
region,
api_key,
host_override,
connection_timeout=connection_timeout,
read_timeout=read_timeout,
) )
self._request_thread_pool = request_thread_pool self._request_thread_pool = request_thread_pool

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.4.11" version = "0.4.12"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -323,7 +323,7 @@ impl JsTable {
.and_then(|val| val.downcast::<JsNumber, _>(&mut cx).ok()) .and_then(|val| val.downcast::<JsNumber, _>(&mut cx).ok())
.map(|val| val.value(&mut cx) as i64) .map(|val| val.value(&mut cx) as i64)
.unwrap_or_else(|| 2 * 7 * 24 * 60); // 2 weeks .unwrap_or_else(|| 2 * 7 * 24 * 60); // 2 weeks
let older_than = chrono::Duration::minutes(older_than); let older_than = chrono::Duration::try_minutes(older_than).unwrap();
let delete_unverified: Option<bool> = Some( let delete_unverified: Option<bool> = Some(
cx.argument_opt(1) cx.argument_opt(1)
.and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok()) .and_then(|val| val.downcast::<JsBoolean, _>(&mut cx).ok())

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.4.11" version = "0.4.12"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true

View File

@@ -796,10 +796,10 @@ mod tests {
let tmp_dir = tempdir().unwrap(); let tmp_dir = tempdir().unwrap();
let mut names = Vec::with_capacity(100); let mut names = Vec::with_capacity(100);
for _ in 0..100 { for _ in 0..100 {
let name = uuid::Uuid::new_v4().to_string(); let mut name = uuid::Uuid::new_v4().to_string();
names.push(name.clone()); names.push(name.clone());
let table_name = name + ".lance"; name.push_str(".lance");
create_dir_all(tmp_dir.path().join(&table_name)).unwrap(); create_dir_all(tmp_dir.path().join(&name)).unwrap();
} }
names.sort(); names.sort();

View File

@@ -65,7 +65,6 @@ mod tests {
use super::*; use super::*;
use arrow_array::{Float32Array, Int64Array, RecordBatch}; use arrow_array::{Float32Array, Int64Array, RecordBatch};
use arrow_ipc::writer::StreamWriter;
use arrow_schema::{DataType, Field, Schema}; use arrow_schema::{DataType, Field, Schema};
use std::sync::Arc; use std::sync::Arc;

View File

@@ -504,6 +504,13 @@ impl Table {
} }
} }
impl From<NativeTable> for Table {
fn from(table: NativeTable) -> Self {
Self {
inner: Arc::new(table),
}
}
}
/// A table in a LanceDB database. /// A table in a LanceDB database.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct NativeTable { pub struct NativeTable {
@@ -1141,7 +1148,7 @@ impl TableInternal for NativeTable {
.compaction; .compaction;
stats.prune = self stats.prune = self
.optimize(OptimizeAction::Prune { .optimize(OptimizeAction::Prune {
older_than: Duration::days(7), older_than: Duration::try_days(7).unwrap(),
delete_unverified: None, delete_unverified: None,
}) })
.await? .await?

View File

@@ -1,67 +0,0 @@
// Copyright 2024 LanceDB Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow_array::RecordBatchIterator;
#[tokio::test]
#[ignore]
async fn cloud_integration_test() {
let project = std::env::var("LANCEDB_PROJECT")
.expect("the LANCEDB_PROJECT env must be set to run the cloud integration test");
let api_key = std::env::var("LANCEDB_API_KEY")
.expect("the LANCEDB_API_KEY env must be set to run the cloud integration test");
let region = std::env::var("LANCEDB_REGION")
.expect("the LANCEDB_REGION env must be set to run the cloud integration test");
let host_override = std::env::var("LANCEDB_HOST_OVERRIDE")
.map(Some)
.unwrap_or(None);
if host_override.is_none() {
println!("No LANCEDB_HOST_OVERRIDE has been set. Running integration test against LanceDb Cloud production instance");
}
let mut builder = lancedb::connect(&format!("db://{}", project))
.api_key(&api_key)
.region(&region);
if let Some(host_override) = &host_override {
builder = builder.host_override(host_override);
}
let db = builder.execute().await.unwrap();
let schema = Arc::new(arrow_schema::Schema::new(vec![
arrow_schema::Field::new("id", arrow_schema::DataType::Int64, false),
arrow_schema::Field::new("name", arrow_schema::DataType::Utf8, false),
]));
let initial_data = arrow::record_batch::RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(arrow_array::Int64Array::from(vec![1, 2, 3])),
Arc::new(arrow_array::StringArray::from(vec!["a", "b", "c"])),
],
);
let rbr = RecordBatchIterator::new(vec![initial_data], schema);
let name = uuid::Uuid::new_v4().to_string();
let tbl = db
.create_table(name.clone(), Box::new(rbr))
.execute()
.await
.unwrap();
assert_eq!(tbl.name(), name);
let table_names = db.table_names().execute().await.unwrap();
assert!(table_names.contains(&name));
}