mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
4 Commits
python-v0.
...
v0.1.19
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d70e7c29b | ||
|
|
73cc12ecc5 | ||
|
|
6036cf48a7 | ||
|
|
15f4787cc8 |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.1.18
|
||||
current_version = 0.1.19
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
@@ -13,4 +13,5 @@ arrow-schema = "42.0"
|
||||
arrow-ipc = "42.0"
|
||||
half = { "version" = "=2.2.1", default-features = false }
|
||||
object_store = "0.6.1"
|
||||
snafu = "0.7.4"
|
||||
|
||||
|
||||
@@ -57,12 +57,14 @@ nav:
|
||||
- Basics: basic.md
|
||||
- Embeddings: embedding.md
|
||||
- Python full-text search: fts.md
|
||||
- Python integrations:
|
||||
- Integrations:
|
||||
- Pandas and PyArrow: python/arrow.md
|
||||
- DuckDB: python/duckdb.md
|
||||
- LangChain 🦜️🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
|
||||
- LangChain JS/TS 🦜️🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
|
||||
- LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
|
||||
- Pydantic: python/pydantic.md
|
||||
- Voxel51: integrations/voxel51.md
|
||||
- Python examples:
|
||||
- YouTube Transcript Search: notebooks/youtube_transcript_search.ipynb
|
||||
- Documentation QA Bot using LangChain: notebooks/code_qa_bot.ipynb
|
||||
@@ -72,6 +74,7 @@ nav:
|
||||
- Javascript examples:
|
||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
|
||||
- References:
|
||||
- Vector Search: search.md
|
||||
- SQL filters: sql.md
|
||||
|
||||
BIN
docs/src/assets/voxel.gif
Normal file
BIN
docs/src/assets/voxel.gif
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 953 KiB |
@@ -4,4 +4,10 @@
|
||||
|
||||
<img id="splash" width="400" alt="youtube transcript search" src="https://user-images.githubusercontent.com/917119/236965568-def7394d-171c-45f2-939d-8edfeaadd88c.png">
|
||||
|
||||
|
||||
<a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab">
|
||||
|
||||
Scripts - [](./examples/youtube_bot/main.py) [](./examples/youtube_bot/index.js)
|
||||
|
||||
|
||||
This example is in a [notebook](https://github.com/lancedb/lancedb/blob/main/docs/src/notebooks/youtube_transcript_search.ipynb)
|
||||
|
||||
71
docs/src/integrations/voxel51.md
Normal file
71
docs/src/integrations/voxel51.md
Normal file
@@ -0,0 +1,71 @@
|
||||

|
||||
|
||||
Basic recipe
|
||||
____________
|
||||
|
||||
The basic workflow to use LanceDB to create a similarity index on your FiftyOne
|
||||
datasets and use this to query your data is as follows:
|
||||
|
||||
1) Load a dataset into FiftyOne
|
||||
|
||||
2) Compute embedding vectors for samples or patches in your dataset, or select
|
||||
a model to use to generate embeddings
|
||||
|
||||
3) Use the `compute_similarity()`
|
||||
method to generate a LanceDB table for the samples or object
|
||||
patches embeddings in a dataset by setting the parameter `backend="lancedb"` and
|
||||
specifying a `brain_key` of your choice
|
||||
|
||||
4) Use this LanceDB table to query your data with
|
||||
`sort_by_similarity()`
|
||||
|
||||
5) If desired, delete the table
|
||||
|
||||
The example below demonstrates this workflow.
|
||||
|
||||
!!! Note
|
||||
|
||||
You must install the LanceDB Python client to run this
|
||||
```
|
||||
pip install lancedb
|
||||
```
|
||||
|
||||
```python
|
||||
|
||||
import fiftyone as fo
|
||||
import fiftyone.brain as fob
|
||||
import fiftyone.zoo as foz
|
||||
|
||||
# Step 1: Load your data into FiftyOne
|
||||
dataset = foz.load_zoo_dataset("quickstart")
|
||||
|
||||
# Steps 2 and 3: Compute embeddings and create a similarity index
|
||||
lancedb_index = fob.compute_similarity(
|
||||
dataset,
|
||||
model="clip-vit-base32-torch",
|
||||
brain_key="lancedb_index",
|
||||
backend="lancedb",
|
||||
)
|
||||
```
|
||||
Once the similarity index has been generated, we can query our data in FiftyOne
|
||||
by specifying the `brain_key`:
|
||||
|
||||
```python
|
||||
# Step 4: Query your data
|
||||
query = dataset.first().id # query by sample ID
|
||||
view = dataset.sort_by_similarity(
|
||||
query,
|
||||
brain_key="lancedb_index",
|
||||
k=10, # limit to 10 most similar samples
|
||||
)
|
||||
|
||||
# Step 5 (optional): Cleanup
|
||||
|
||||
# Delete the LanceDB table
|
||||
lancedb_index.cleanup()
|
||||
|
||||
# Delete run record from FiftyOne
|
||||
dataset.delete_brain_run("lancedb_index")
|
||||
```
|
||||
|
||||
More in depth walkthrough of the integration, visit the LanceDB guide on Voxel51 - [LaceDB x Voxel51](https://docs.voxel51.com/integrations/lancedb.html)
|
||||
@@ -10,7 +10,11 @@
|
||||
"\n",
|
||||
"This Q&A bot will allow you to query your own documentation easily using questions. We'll also demonstrate the use of LangChain and LanceDB using the OpenAI API. \n",
|
||||
"\n",
|
||||
"In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well"
|
||||
"In this example we'll use Pandas 2.0 documentation, but, this could be replaced for your own docs as well\n",
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/Code-Documentation-QA-Bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
|
||||
"\n",
|
||||
"Scripts - [](./examples/Code-Documentation-QA-Bot/main.py) [](./examples/Code-Documentation-QA-Bot/index.js)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
" <a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/multimodal_clip/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>| [](./examples/multimodal_clip/main.py) |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -42,6 +51,19 @@
|
||||
"## First run setup: Download data and pre-process"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Get dataset\n",
|
||||
"\n",
|
||||
"!wget https://eto-public.s3.us-west-2.amazonaws.com/datasets/diffusiondb_lance.tar.gz\n",
|
||||
"!tar -xvf diffusiondb_lance.tar.gz\n",
|
||||
"!mv diffusiondb_test rawdata.lance\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
@@ -247,7 +269,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3.11.4 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -261,7 +283,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -8,7 +8,12 @@
|
||||
"source": [
|
||||
"# Youtube Transcript Search QA Bot\n",
|
||||
"\n",
|
||||
"This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily."
|
||||
"This Q&A bot will allow you to search through youtube transcripts using natural language! By going through this notebook, we'll introduce how you can use LanceDB to store and manage your data easily.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<a href=\"https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/youtube_bot/main.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\">\n",
|
||||
"\n",
|
||||
"Scripts - [](./examples/youtube_bot/main.py) [](./examples/youtube_bot/index.js)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,7 +7,8 @@ excluded_files = [
|
||||
"../src/embedding.md",
|
||||
"../src/examples/serverless_lancedb_with_s3_and_lambda.md",
|
||||
"../src/examples/serverless_qa_bot_with_modal_and_langchain.md",
|
||||
"../src/examples/youtube_transcript_bot_with_nodejs.md"
|
||||
"../src/examples/youtube_transcript_bot_with_nodejs.md",
|
||||
"../src/integrations/voxel51.md",
|
||||
]
|
||||
|
||||
python_prefix = "py"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.1.18",
|
||||
"version": "0.1.19",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -78,10 +78,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.1.18",
|
||||
"@lancedb/vectordb-darwin-x64": "0.1.18",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.18",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.18",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.18"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.1.19",
|
||||
"@lancedb/vectordb-darwin-x64": "0.1.19",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.1.19",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.1.19",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.1.19"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@ export interface EmbeddingFunction<T> {
|
||||
}
|
||||
|
||||
export function isEmbeddingFunction<T> (value: any): value is EmbeddingFunction<T> {
|
||||
return Object.keys(value).length === 2 &&
|
||||
typeof value.sourceColumn === 'string' &&
|
||||
return typeof value.sourceColumn === 'string' &&
|
||||
typeof value.embed === 'function'
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import { describe } from 'mocha'
|
||||
import { assert } from 'chai'
|
||||
|
||||
import { OpenAIEmbeddingFunction } from '../../embedding/openai'
|
||||
import { isEmbeddingFunction } from '../../embedding/embedding_function'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { OpenAIApi } = require('openai')
|
||||
@@ -47,4 +48,10 @@ describe('OpenAPIEmbeddings', function () {
|
||||
assert.deepEqual(vectors[1], stubValue.data.data[1].embedding)
|
||||
})
|
||||
})
|
||||
|
||||
describe('isEmbeddingFunction', function () {
|
||||
it('should match the isEmbeddingFunction guard', function () {
|
||||
assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key')))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -134,6 +134,18 @@ describe('LanceDB client', function () {
|
||||
assert.equal(await table.countRows(), 2)
|
||||
})
|
||||
|
||||
it('fails to create a new table when the vector column is missing', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ id: 1, price: 10 }
|
||||
]
|
||||
|
||||
const create = con.createTable('missing_vector', data)
|
||||
await expect(create).to.be.rejectedWith(Error, 'column \'vector\' is missing')
|
||||
})
|
||||
|
||||
it('use overwrite flag to overwrite existing table', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
@@ -230,6 +242,14 @@ describe('LanceDB client', function () {
|
||||
// Default replace = true
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
}).timeout(50_000)
|
||||
|
||||
it('it should fail when the column is not a vector', async function () {
|
||||
const uri = await createTestDB(32, 300)
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/)
|
||||
})
|
||||
})
|
||||
|
||||
describe('when using a custom embedding function', function () {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.1.18"
|
||||
version = "0.1.19"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
@@ -21,5 +21,6 @@ vectordb = { path = "../../vectordb" }
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
neon = {version = "0.10.1", default-features = false, features = ["channel-api", "napi-6", "promise-api", "task-api"] }
|
||||
object_store = { workspace = true, features = ["aws"] }
|
||||
snafu = { workspace = true }
|
||||
async-trait = "0"
|
||||
env_logger = "0"
|
||||
|
||||
@@ -13,27 +13,30 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::Cursor;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::cast::as_list_array;
|
||||
use arrow_array::{Array, FixedSizeListArray, RecordBatch};
|
||||
use arrow_array::{Array, ArrayRef, FixedSizeListArray, RecordBatch};
|
||||
use arrow_ipc::reader::FileReader;
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use lance::arrow::{FixedSizeListArrayExt, RecordBatchExt};
|
||||
use vectordb::table::VECTOR_COLUMN_NAME;
|
||||
|
||||
use crate::error::{MissingColumnSnafu, Result};
|
||||
use snafu::prelude::*;
|
||||
|
||||
pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> Result<RecordBatch> {
|
||||
let column = get_column(VECTOR_COLUMN_NAME, &record_batch)?;
|
||||
|
||||
pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
|
||||
let column = record_batch
|
||||
.column_by_name("vector")
|
||||
.cloned()
|
||||
.expect("vector column is missing");
|
||||
// TODO: we should just consume the underlying js buffer in the future instead of this arrow around a bunch of times
|
||||
let arr = as_list_array(column.as_ref());
|
||||
let list_size = arr.values().len() / record_batch.num_rows();
|
||||
let r =
|
||||
FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32).unwrap();
|
||||
let r = FixedSizeListArray::try_new_from_values(arr.values().to_owned(), list_size as i32)?;
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||
"vector",
|
||||
VECTOR_COLUMN_NAME,
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
list_size as i32,
|
||||
@@ -41,22 +44,42 @@ pub(crate) fn convert_record_batch(record_batch: RecordBatch) -> RecordBatch {
|
||||
true,
|
||||
)]));
|
||||
|
||||
let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)]).unwrap();
|
||||
let mut new_batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(r)])?;
|
||||
|
||||
if record_batch.num_columns() > 1 {
|
||||
let rb = record_batch.drop_column("vector").unwrap();
|
||||
new_batch = new_batch.merge(&rb).unwrap();
|
||||
let rb = record_batch.drop_column(VECTOR_COLUMN_NAME)?;
|
||||
new_batch = new_batch.merge(&rb)?;
|
||||
}
|
||||
new_batch
|
||||
Ok(new_batch)
|
||||
}
|
||||
|
||||
pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Vec<RecordBatch> {
|
||||
fn get_column(column_name: &str, record_batch: &RecordBatch) -> Result<ArrayRef> {
|
||||
record_batch
|
||||
.column_by_name(column_name)
|
||||
.cloned()
|
||||
.context(MissingColumnSnafu { name: column_name })
|
||||
}
|
||||
|
||||
pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<Vec<RecordBatch>> {
|
||||
let mut batches: Vec<RecordBatch> = Vec::new();
|
||||
let fr = FileReader::try_new(Cursor::new(slice), None);
|
||||
let file_reader = fr.unwrap();
|
||||
let file_reader = FileReader::try_new(Cursor::new(slice), None)?;
|
||||
for b in file_reader {
|
||||
let record_batch = convert_record_batch(b.unwrap());
|
||||
let record_batch = convert_record_batch(b?)?;
|
||||
batches.push(record_batch);
|
||||
}
|
||||
batches
|
||||
Ok(batches)
|
||||
}
|
||||
|
||||
pub(crate) fn record_batch_to_buffer(batches: Vec<RecordBatch>) -> Result<Vec<u8>> {
|
||||
if batches.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let schema = batches.get(0).unwrap().schema();
|
||||
let mut fr = FileWriter::try_new(Vec::new(), schema.deref())?;
|
||||
for batch in batches.iter() {
|
||||
fr.write(batch)?
|
||||
}
|
||||
fr.finish()?;
|
||||
Ok(fr.into_inner()?)
|
||||
}
|
||||
|
||||
73
rust/ffi/node/src/error.rs
Normal file
73
rust/ffi/node/src/error.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use neon::context::Context;
|
||||
use neon::prelude::NeonResult;
|
||||
use snafu::Snafu;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub(crate)))]
|
||||
pub enum Error {
|
||||
#[snafu(display("column '{name}' is missing"))]
|
||||
MissingColumn { name: String },
|
||||
#[snafu(display("{message}"))]
|
||||
LanceDB { message: String },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl From<vectordb::error::Error> for Error {
|
||||
fn from(e: vectordb::error::Error) -> Self {
|
||||
Self::LanceDB {
|
||||
message: e.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lance::Error> for Error {
|
||||
fn from(e: lance::Error) -> Self {
|
||||
Self::LanceDB {
|
||||
message: e.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ArrowError> for Error {
|
||||
fn from(value: ArrowError) -> Self {
|
||||
Self::LanceDB {
|
||||
message: value.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ResultExt is used to transform a [`Result`] into a [`NeonResult`],
|
||||
/// so it can be returned as a JavaScript error
|
||||
/// Copied from [Neon](https://github.com/neon-bindings/neon/blob/4c2e455a9e6814f1ba0178616d63caec7f4df317/crates/neon/src/result/mod.rs#L88)
|
||||
pub trait ResultExt<T> {
|
||||
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T>;
|
||||
}
|
||||
|
||||
/// Implement ResultExt for the std Result so it can be used any Result type
|
||||
impl<T, E> ResultExt<T> for std::result::Result<T, E>
|
||||
where
|
||||
E: std::fmt::Display,
|
||||
{
|
||||
fn or_throw<'a, C: Context<'a>>(self, cx: &mut C) -> NeonResult<T> {
|
||||
match self {
|
||||
Ok(value) => Ok(value),
|
||||
Err(error) => cx.throw_error(error.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,6 @@ use std::ops::Deref;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use arrow_array::{Float32Array, RecordBatchIterator};
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use async_trait::async_trait;
|
||||
use futures::{TryFutureExt, TryStreamExt};
|
||||
use lance::dataset::{WriteMode, WriteParams};
|
||||
@@ -35,10 +34,12 @@ use vectordb::database::Database;
|
||||
use vectordb::error::Error;
|
||||
use vectordb::table::{ReadParams, Table};
|
||||
|
||||
use crate::arrow::arrow_buffer_to_record_batch;
|
||||
use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
|
||||
use crate::error::ResultExt;
|
||||
|
||||
mod arrow;
|
||||
mod convert;
|
||||
mod error;
|
||||
mod index;
|
||||
|
||||
struct JsDatabase {
|
||||
@@ -86,7 +87,7 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
|
||||
|
||||
LOG.get_or_init(|| env_logger::init());
|
||||
|
||||
RUNTIME.get_or_try_init(|| Runtime::new().or_else(|err| cx.throw_error(err.to_string())))
|
||||
RUNTIME.get_or_try_init(|| Runtime::new().or_throw(cx))
|
||||
}
|
||||
|
||||
fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
@@ -101,7 +102,7 @@ fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let db = JsDatabase {
|
||||
database: Arc::new(database.or_else(|err| cx.throw_error(err.to_string()))?),
|
||||
database: Arc::new(database.or_throw(&mut cx)?),
|
||||
};
|
||||
Ok(cx.boxed(db))
|
||||
});
|
||||
@@ -123,7 +124,7 @@ fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let tables_rst = database.table_names().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let tables = tables_rst.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let tables = tables_rst.or_throw(&mut cx)?;
|
||||
let table_names = convert::vec_str_to_array(&tables, &mut cx);
|
||||
table_names
|
||||
});
|
||||
@@ -194,9 +195,7 @@ fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let table_rst = database.open_table_with_params(&table_name, ¶ms).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let table = Arc::new(Mutex::new(
|
||||
table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
|
||||
));
|
||||
let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
|
||||
Ok(cx.boxed(JsTable { table }))
|
||||
});
|
||||
});
|
||||
@@ -217,7 +216,7 @@ fn database_drop_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
rt.spawn(async move {
|
||||
let result = database.drop_table(&table_name).await;
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
result.or_throw(&mut cx)?;
|
||||
Ok(cx.null())
|
||||
});
|
||||
});
|
||||
@@ -282,26 +281,9 @@ fn table_search(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
.await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let results = results.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let vector: Vec<u8> = Vec::new();
|
||||
|
||||
if results.is_empty() {
|
||||
return cx.buffer(0);
|
||||
}
|
||||
|
||||
let schema = results.get(0).unwrap().schema();
|
||||
let mut fr = FileWriter::try_new(vector, schema.deref())
|
||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
|
||||
for batch in results.iter() {
|
||||
fr.write(batch)
|
||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
}
|
||||
fr.finish().or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let buf = fr
|
||||
.into_inner()
|
||||
.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
Ok(JsBuffer::external(&mut cx, buf))
|
||||
let results = results.or_throw(&mut cx)?;
|
||||
let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?;
|
||||
Ok(JsBuffer::external(&mut cx, buffer))
|
||||
});
|
||||
});
|
||||
Ok(promise)
|
||||
@@ -313,7 +295,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
.downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
|
||||
let table_name = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
let buffer = cx.argument::<JsBuffer>(1)?;
|
||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
|
||||
let schema = batches[0].schema();
|
||||
|
||||
// Write mode
|
||||
@@ -351,9 +333,7 @@ fn table_create(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
.await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let table = Arc::new(Mutex::new(
|
||||
table_rst.or_else(|err| cx.throw_error(err.to_string()))?,
|
||||
));
|
||||
let table = Arc::new(Mutex::new(table_rst.or_throw(&mut cx)?));
|
||||
Ok(cx.boxed(JsTable { table }))
|
||||
});
|
||||
});
|
||||
@@ -370,7 +350,8 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let buffer = cx.argument::<JsBuffer>(0)?;
|
||||
let write_mode = cx.argument::<JsString>(1)?.value(&mut cx);
|
||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx));
|
||||
|
||||
let batches = arrow_buffer_to_record_batch(buffer.as_slice(&mut cx)).or_throw(&mut cx)?;
|
||||
let schema = batches[0].schema();
|
||||
|
||||
let rt = runtime(&mut cx)?;
|
||||
@@ -399,7 +380,7 @@ fn table_add(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let add_result = table.lock().unwrap().add(batch_reader, Some(params)).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let _added = add_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let _added = add_result.or_throw(&mut cx)?;
|
||||
Ok(cx.boolean(true))
|
||||
});
|
||||
});
|
||||
@@ -418,7 +399,7 @@ fn table_count_rows(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let num_rows_result = table.lock().unwrap().count_rows().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let num_rows = num_rows_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
let num_rows = num_rows_result.or_throw(&mut cx)?;
|
||||
Ok(cx.number(num_rows as f64))
|
||||
});
|
||||
});
|
||||
@@ -438,7 +419,7 @@ fn table_delete(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let delete_result = rt.block_on(async move { table.lock().unwrap().delete(&predicate).await });
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
delete_result.or_else(|err| cx.throw_error(err.to_string()))?;
|
||||
delete_result.or_throw(&mut cx)?;
|
||||
Ok(cx.undefined())
|
||||
});
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.1.18"
|
||||
version = "0.1.19"
|
||||
edition = "2021"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
@@ -12,7 +12,7 @@ arrow-array = { workspace = true }
|
||||
arrow-data = { workspace = true }
|
||||
arrow-schema = { workspace = true }
|
||||
object_store = { workspace = true }
|
||||
snafu = "0.7.4"
|
||||
snafu = { workspace = true }
|
||||
half = { workspace = true }
|
||||
lance = { workspace = true }
|
||||
tokio = { version = "1.23", features = ["rt-multi-thread"] }
|
||||
|
||||
Reference in New Issue
Block a user