feat: add create_index to the async python API (#1052)

This also refactors the rust lancedb index builder API (and,
correspondingly, the nodejs API)
This commit is contained in:
Weston Pace
2024-03-12 05:17:05 -07:00
parent 90af5cf028
commit f822255683
38 changed files with 1329 additions and 766 deletions

View File

@@ -14,12 +14,10 @@ crate-type = ["cdylib"]
[dependencies]
arrow-ipc.workspace = true
futures.workspace = true
lance-linalg.workspace = true
lance.workspace = true
lancedb = { path = "../rust/lancedb" }
napi = { version = "2.15", default-features = false, features = [
"napi7",
"async"
"async",
] }
napi-derive = "2"

View File

@@ -27,6 +27,7 @@ import {
Float64,
} from "apache-arrow";
import { makeArrowTable } from "../dist/arrow";
import { Index } from "../dist/indices";
describe("Given a table", () => {
let tmpDir: tmp.DirResult;
@@ -67,19 +68,17 @@ describe("Given a table", () => {
});
});
describe("Test creating index", () => {
describe("When creating an index", () => {
let tmpDir: tmp.DirResult;
const schema = new Schema([
new Field("id", new Int32(), true),
new Field("vec", new FixedSizeList(32, new Field("item", new Float32()))),
]);
let tbl: Table;
let queryVec: number[];
beforeEach(() => {
beforeEach(async () => {
tmpDir = tmp.dirSync({ unsafeCleanup: true });
});
afterEach(() => tmpDir.removeCallback());
test("create vector index with no column", async () => {
const db = await connect(tmpDir.name);
const data = makeArrowTable(
Array(300)
@@ -94,8 +93,13 @@ describe("Test creating index", () => {
schema,
},
);
const tbl = await db.createTable("test", data);
await tbl.createIndex().build();
queryVec = data.toArray()[5].vec.toJSON();
tbl = await db.createTable("test", data);
});
afterEach(() => tmpDir.removeCallback());
it("should create a vector index on vector columns", async () => {
await tbl.createIndex("vec");
// check index directory
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
@@ -103,38 +107,47 @@ describe("Test creating index", () => {
// TODO: check index type.
// Search without specifying the column
const queryVector = data.toArray()[5].vec.toJSON();
const rst = await tbl.query().nearestTo(queryVector).limit(2).toArrow();
const rst = await tbl.query().nearestTo(queryVec).limit(2).toArrow();
expect(rst.numRows).toBe(2);
// Search with specifying the column
const rst2 = await tbl.search(queryVector, "vec").limit(2).toArrow();
const rst2 = await tbl.search(queryVec, "vec").limit(2).toArrow();
expect(rst2.numRows).toBe(2);
expect(rst.toString()).toEqual(rst2.toString());
});
test("no vector column available", async () => {
const db = await connect(tmpDir.name);
const tbl = await db.createTable(
"no_vec",
makeArrowTable([
{ id: 1, val: 2 },
{ id: 2, val: 3 },
]),
);
await expect(tbl.createIndex().build()).rejects.toThrow(
"No vector column found",
);
it("should allow parameters to be specified", async () => {
await tbl.createIndex("vec", {
config: Index.ivfPq({
numPartitions: 10,
}),
});
await tbl.createIndex("val").build();
const indexDir = path.join(tmpDir.name, "no_vec.lance", "_indices");
// TODO: Verify parameters when we can load index config as part of list indices
});
it("should allow me to replace (or not) an existing index", async () => {
await tbl.createIndex("id");
// Default is replace=true
await tbl.createIndex("id");
await expect(tbl.createIndex("id", { replace: false })).rejects.toThrow(
"already exists",
);
await tbl.createIndex("id", { replace: true });
});
test("should create a scalar index on scalar columns", async () => {
await tbl.createIndex("id");
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
expect(fs.readdirSync(indexDir)).toHaveLength(1);
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
expect(r.numRows).toBe(1);
expect(r.numRows).toBe(298);
}
});
// TODO: Move this test to the query API test (making sure we can reject queries
// when the dimension is incorrect)
test("two columns with different dimensions", async () => {
const db = await connect(tmpDir.name);
const schema = new Schema([
@@ -164,14 +177,9 @@ describe("Test creating index", () => {
);
// Only build index over v1
await expect(tbl.createIndex().build()).rejects.toThrow(
/.*More than one vector columns found.*/,
);
tbl
.createIndex("vec")
// eslint-disable-next-line @typescript-eslint/naming-convention
.ivf_pq({ num_partitions: 2, num_sub_vectors: 2 })
.build();
await tbl.createIndex("vec", {
config: Index.ivfPq({ numPartitions: 2, numSubVectors: 2 }),
});
const rst = await tbl
.query()
@@ -205,30 +213,6 @@ describe("Test creating index", () => {
expect(rst64Query.toString()).toEqual(rst64Search.toString());
expect(rst64Query.numRows).toBe(2);
});
test("create scalar index", async () => {
const db = await connect(tmpDir.name);
const data = makeArrowTable(
Array(300)
.fill(1)
.map((_, i) => ({
id: i,
vec: Array(32)
.fill(1)
.map(() => Math.random()),
})),
{
schema,
},
);
const tbl = await db.createTable("test", data);
await tbl.createIndex("id").build();
// check index directory
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
expect(fs.readdirSync(indexDir)).toHaveLength(1);
// TODO: check index type.
});
});
describe("Read consistency interval", () => {

View File

@@ -18,15 +18,9 @@ import {
ConnectionOptions,
} from "./native.js";
export {
ConnectionOptions,
WriteOptions,
Query,
MetricType,
} from "./native.js";
export { Connection } from "./connection";
export { Table } from "./table";
export { IvfPQOptions, IndexBuilder } from "./indexer";
export { ConnectionOptions, WriteOptions, Query } from "./native.js";
export { Connection, CreateTableOptions } from "./connection";
export { Table, AddDataOptions } from "./table";
/**
* Connect to a LanceDB instance at the given URI.

View File

@@ -1,105 +0,0 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// TODO: Re-enable this as part of https://github.com/lancedb/lancedb/pull/1052
/* eslint-disable @typescript-eslint/naming-convention */
import {
MetricType,
IndexBuilder as NativeBuilder,
Table as NativeTable,
} from "./native";
/** Options to create `IVF_PQ` index */
export interface IvfPQOptions {
/** Number of IVF partitions. */
num_partitions?: number;
/** Number of sub-vectors in PQ coding. */
num_sub_vectors?: number;
/** Number of bits used for each PQ code.
*/
num_bits?: number;
/** Metric type to calculate the distance between vectors.
*
* Supported metrics: `L2`, `Cosine` and `Dot`.
*/
metric_type?: MetricType;
/** Number of iterations to train K-means.
*
* Default is 50. The more iterations it usually yield better results,
* but it takes longer to train.
*/
max_iterations?: number;
sample_rate?: number;
}
/**
* Building an index on LanceDB {@link Table}
*
* @see {@link Table.createIndex} for detailed usage.
*/
export class IndexBuilder {
private inner: NativeBuilder;
constructor(tbl: NativeTable) {
this.inner = tbl.createIndex();
}
/** Instruct the builder to build an `IVF_PQ` index */
ivf_pq(options?: IvfPQOptions): IndexBuilder {
this.inner.ivfPq(
options?.metric_type,
options?.num_partitions,
options?.num_sub_vectors,
options?.num_bits,
options?.max_iterations,
options?.sample_rate,
);
return this;
}
/** Instruct the builder to build a Scalar index. */
scalar(): IndexBuilder {
this.scalar();
return this;
}
/** Set the column(s) to create index on top of. */
column(col: string): IndexBuilder {
this.inner.column(col);
return this;
}
/** Set to true to replace existing index. */
replace(val: boolean): IndexBuilder {
this.inner.replace(val);
return this;
}
/** Specify the name of the index. Optional */
name(n: string): IndexBuilder {
this.inner.name(n);
return this;
}
/** Building the index. */
async build() {
await this.inner.build();
}
}

195
nodejs/lancedb/indices.ts Normal file
View File

@@ -0,0 +1,195 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { Index as LanceDbIndex } from "./native";
/**
* Options to create an `IVF_PQ` index
*/
export interface IvfPqOptions {
/** The number of IVF partitions to create.
*
* This value should generally scale with the number of rows in the dataset.
* By default the number of partitions is the square root of the number of
* rows.
*
* If this value is too large then the first part of the search (picking the
* right partition) will be slow. If this value is too small then the second
* part of the search (searching within a partition) will be slow.
*/
numPartitions?: number;
/** Number of sub-vectors of PQ.
*
* This value controls how much the vector is compressed during the quantization step.
* The more sub vectors there are the less the vector is compressed. The default is
* the dimension of the vector divided by 16. If the dimension is not evenly divisible
* by 16 we use the dimension divded by 8.
*
* The above two cases are highly preferred. Having 8 or 16 values per subvector allows
* us to use efficient SIMD instructions.
*
* If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
* will likely result in poor performance.
*/
numSubVectors?: number;
/** [DistanceType] to use to build the index.
*
* Default value is [DistanceType::L2].
*
* This is used when training the index to calculate the IVF partitions
* (vectors are grouped in partitions with similar vectors according to this
* distance type) and to calculate a subvector's code during quantization.
*
* The distance type used to train an index MUST match the distance type used
* to search the index. Failure to do so will yield inaccurate results.
*
* The following distance types are available:
*
* "l2" - Euclidean distance. This is a very common distance metric that
* accounts for both magnitude and direction when determining the distance
* between vectors. L2 distance has a range of [0, ∞).
*
* "cosine" - Cosine distance. Cosine distance is a distance metric
* calculated from the cosine similarity between two vectors. Cosine
* similarity is a measure of similarity between two non-zero vectors of an
* inner product space. It is defined to equal the cosine of the angle
* between them. Unlike L2, the cosine distance is not affected by the
* magnitude of the vectors. Cosine distance has a range of [0, 2].
*
* Note: the cosine distance is undefined when one (or both) of the vectors
* are all zeros (there is no direction). These vectors are invalid and may
* never be returned from a vector search.
*
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
*/
distanceType?: "l2" | "cosine" | "dot";
/** Max iteration to train IVF kmeans.
*
* When training an IVF PQ index we use kmeans to calculate the partitions. This parameter
* controls how many iterations of kmeans to run.
*
* Increasing this might improve the quality of the index but in most cases these extra
* iterations have diminishing returns.
*
* The default value is 50.
*/
maxIterations?: number;
/** The number of vectors, per partition, to sample when training IVF kmeans.
*
* When an IVF PQ index is trained, we need to calculate partitions. These are groups
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
*
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
* random sample of the data. This parameter controls the size of the sample. The total
* number of vectors used to train the index is `sample_rate * num_partitions`.
*
* Increasing this value might improve the quality of the index but in most cases the
* default should be sufficient.
*
* The default value is 256.
*/
sampleRate?: number;
}
export class Index {
private readonly inner: LanceDbIndex;
private constructor(inner: LanceDbIndex) {
this.inner = inner;
}
/**
* Create an IvfPq index
*
* This index stores a compressed (quantized) copy of every vector. These vectors
* are grouped into partitions of similar vectors. Each partition keeps track of
* a centroid which is the average value of all vectors in the group.
*
* During a query the centroids are compared with the query vector to find the closest
* partitions. The compressed vectors in these partitions are then searched to find
* the closest vectors.
*
* The compression scheme is called product quantization. Each vector is divided into
* subvectors and then each subvector is quantized into a small number of bits. the
* parameters `num_bits` and `num_subvectors` control this process, providing a tradeoff
* between index size (and thus search speed) and index accuracy.
*
* The partitioning process is called IVF and the `num_partitions` parameter controls how
* many groups to create.
*
* Note that training an IVF PQ index on a large dataset is a slow operation and
* currently is also a memory intensive operation.
*/
static ivfPq(options?: Partial<IvfPqOptions>) {
return new Index(
LanceDbIndex.ivfPq(
options?.distanceType,
options?.numPartitions,
options?.numSubVectors,
options?.maxIterations,
options?.sampleRate,
),
);
}
/** Create a btree index
*
* A btree index is an index on a scalar columns. The index stores a copy of the column
* in sorted order. A header entry is created for each block of rows (currently the
* block size is fixed at 4096). These header entries are stored in a separate
* cacheable structure (a btree). To search for data the header is used to determine
* which blocks need to be read from disk.
*
* For example, a btree index in a table with 1Bi rows requires sizeof(Scalar) * 256Ki
* bytes of memory and will generally need to read sizeof(Scalar) * 4096 bytes to find
* the correct row ids.
*
* This index is good for scalar columns with mostly distinct values and does best when
* the query is highly selective.
*
* The btree index does not currently have any parameters though parameters such as the
* block size may be added in the future.
*/
static btree() {
return new Index(LanceDbIndex.btree());
}
}
export interface IndexOptions {
/** Advanced index configuration
*
* This option allows you to specify a specfic index to create and also
* allows you to pass in configuration for training the index.
*
* See the static methods on Index for details on the various index types.
*
* If this is not supplied then column data type(s) and column statistics
* will be used to determine the most useful kind of index to create.
*/
config?: Index;
/** Whether to replace the existing index
*
* If this is false, and another index already exists on the same columns
* and the same name, then an error will be returned. This is true even if
* that index is out of date.
*
* The default is true
*/
replace?: boolean;
}

View File

@@ -3,15 +3,6 @@
/* auto-generated by NAPI-RS */
export const enum IndexType {
Scalar = 0,
IvfPq = 1
}
export const enum MetricType {
L2 = 0,
Cosine = 1,
Dot = 2
}
/**
* A definition of a column alteration. The alteration changes the column at
* `path` to have the new name `name`, to be nullable if `nullable` is true,
@@ -93,13 +84,9 @@ export class Connection {
/** Drop table with the name. Or raise an error if the table does not exist. */
dropTable(name: string): Promise<void>
}
export class IndexBuilder {
replace(v: boolean): void
column(c: string): void
name(name: string): void
ivfPq(metricType?: MetricType | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): void
scalar(): void
build(): Promise<void>
export class Index {
static ivfPq(distanceType?: string | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): Index
static btree(): Index
}
/** Typescript-style Async Iterator over RecordBatches */
export class RecordBatchIterator {
@@ -125,7 +112,7 @@ export class Table {
add(buf: Buffer, mode: string): Promise<void>
countRows(filter?: string | undefined | null): Promise<number>
delete(predicate: string): Promise<void>
createIndex(): IndexBuilder
createIndex(index: Index | undefined | null, column: string, replace?: boolean | undefined | null): Promise<void>
query(): Query
addColumns(transforms: Array<AddColumnsSql>): Promise<void>
alterColumns(alterations: Array<ColumnAlteration>): Promise<void>

View File

@@ -295,12 +295,10 @@ if (!nativeBinding) {
throw new Error(`Failed to load native binding`)
}
const { Connection, IndexType, MetricType, IndexBuilder, RecordBatchIterator, Query, Table, WriteMode, connect } = nativeBinding
const { Connection, Index, RecordBatchIterator, Query, Table, WriteMode, connect } = nativeBinding
module.exports.Connection = Connection
module.exports.IndexType = IndexType
module.exports.MetricType = MetricType
module.exports.IndexBuilder = IndexBuilder
module.exports.Index = Index
module.exports.RecordBatchIterator = RecordBatchIterator
module.exports.Query = Query
module.exports.Table = Table

View File

@@ -19,7 +19,7 @@ import {
Table as _NativeTable,
} from "./native";
import { Query } from "./query";
import { IndexBuilder } from "./indexer";
import { IndexOptions } from "./indices";
import { Data, fromDataToBuffer } from "./arrow";
/**
@@ -103,24 +103,28 @@ export class Table {
await this.inner.delete(predicate);
}
/** Create an index over the columns.
/** Create an index to speed up queries.
*
* @param {string} column The column to create the index on. If not specified,
* it will create an index on vector field.
* Indices can be created on vector columns or scalar columns.
* Indices on vector columns will speed up vector searches.
* Indices on scalar columns will speed up filtering (in both
* vector and non-vector searches)
*
* @example
*
* By default, it creates vector idnex on one vector column.
* If the column has a vector (fixed size list) data type then
* an IvfPq vector index will be created.
*
* ```typescript
* const table = await conn.openTable("my_table");
* await table.createIndex().build();
* await table.createIndex(["vector"]);
* ```
*
* You can specify `IVF_PQ` parameters via `ivf_pq({})` call.
* For advanced control over vector index creation you can specify
* the index type and options.
* ```typescript
* const table = await conn.openTable("my_table");
* await table.createIndex("my_vec_col")
* await table.createIndex(["vector"], I)
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
* .build();
* ```
@@ -131,12 +135,11 @@ export class Table {
* await table.createIndex("my_float_col").build();
* ```
*/
createIndex(column?: string): IndexBuilder {
let builder = new IndexBuilder(this.inner);
if (column !== undefined) {
builder = builder.column(column);
}
return builder;
async createIndex(column: string, options?: Partial<IndexOptions>) {
// Bit of a hack to get around the fact that TS has no package-scope.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const nativeIndex = (options?.config as any)?.inner;
await this.inner.createIndex(nativeIndex, column, options?.replace);
}
/**

12
nodejs/src/error.rs Normal file
View File

@@ -0,0 +1,12 @@
pub type Result<T> = napi::Result<T>;
pub trait NapiErrorExt<T> {
/// Convert to a napi error using from_reason(err.to_string())
fn default_error(self) -> Result<T>;
}
impl<T> NapiErrorExt<T> for std::result::Result<T, lancedb::Error> {
fn default_error(self) -> Result<T> {
self.map_err(|err| napi::Error::from_reason(err.to_string()))
}
}

View File

@@ -14,126 +14,73 @@
use std::sync::Mutex;
use lance_linalg::distance::MetricType as LanceMetricType;
use lancedb::index::IndexBuilder as LanceDbIndexBuilder;
use lancedb::Table as LanceDbTable;
use lancedb::index::scalar::BTreeIndexBuilder;
use lancedb::index::vector::IvfPqIndexBuilder;
use lancedb::index::Index as LanceDbIndex;
use lancedb::DistanceType;
use napi_derive::napi;
#[napi]
pub enum IndexType {
Scalar,
IvfPq,
pub struct Index {
inner: Mutex<Option<LanceDbIndex>>,
}
#[napi]
pub enum MetricType {
L2,
Cosine,
Dot,
}
impl From<MetricType> for LanceMetricType {
fn from(metric: MetricType) -> Self {
match metric {
MetricType::L2 => Self::L2,
MetricType::Cosine => Self::Cosine,
MetricType::Dot => Self::Dot,
}
impl Index {
pub fn consume(&self) -> napi::Result<LanceDbIndex> {
self.inner
.lock()
.unwrap()
.take()
.ok_or(napi::Error::from_reason(
"attempt to use an index more than once",
))
}
}
#[napi]
pub struct IndexBuilder {
inner: Mutex<Option<LanceDbIndexBuilder>>,
}
impl IndexBuilder {
fn modify(
&self,
mod_fn: impl Fn(LanceDbIndexBuilder) -> LanceDbIndexBuilder,
) -> napi::Result<()> {
let mut inner = self.inner.lock().unwrap();
let inner_builder = inner.take().ok_or_else(|| {
napi::Error::from_reason("IndexBuilder has already been consumed".to_string())
})?;
let inner_builder = mod_fn(inner_builder);
inner.replace(inner_builder);
Ok(())
}
}
#[napi]
impl IndexBuilder {
pub fn new(tbl: &LanceDbTable) -> Self {
let inner = tbl.create_index(&[]);
Self {
inner: Mutex::new(Some(inner)),
}
}
#[napi]
pub fn replace(&self, v: bool) -> napi::Result<()> {
self.modify(|b| b.replace(v))
}
#[napi]
pub fn column(&self, c: String) -> napi::Result<()> {
self.modify(|b| b.columns(&[c.as_str()]))
}
#[napi]
pub fn name(&self, name: String) -> napi::Result<()> {
self.modify(|b| b.name(name.as_str()))
}
#[napi]
impl Index {
#[napi(factory)]
pub fn ivf_pq(
&self,
metric_type: Option<MetricType>,
distance_type: Option<String>,
num_partitions: Option<u32>,
num_sub_vectors: Option<u32>,
num_bits: Option<u32>,
max_iterations: Option<u32>,
sample_rate: Option<u32>,
) -> napi::Result<()> {
self.modify(|b| {
let mut b = b.ivf_pq();
if let Some(metric_type) = metric_type {
b = b.metric_type(metric_type.into());
}
if let Some(num_partitions) = num_partitions {
b = b.num_partitions(num_partitions);
}
if let Some(num_sub_vectors) = num_sub_vectors {
b = b.num_sub_vectors(num_sub_vectors);
}
if let Some(num_bits) = num_bits {
b = b.num_bits(num_bits);
}
if let Some(max_iterations) = max_iterations {
b = b.max_iterations(max_iterations);
}
if let Some(sample_rate) = sample_rate {
b = b.sample_rate(sample_rate);
}
b
) -> napi::Result<Self> {
let mut ivf_pq_builder = IvfPqIndexBuilder::default();
if let Some(distance_type) = distance_type {
let distance_type = match distance_type.as_str() {
"l2" => Ok(DistanceType::L2),
"cosine" => Ok(DistanceType::Cosine),
"dot" => Ok(DistanceType::Dot),
_ => Err(napi::Error::from_reason(format!(
"Invalid distance type '{}'. Must be one of l2, cosine, or dot",
distance_type
))),
}?;
ivf_pq_builder = ivf_pq_builder.distance_type(distance_type);
}
if let Some(num_partitions) = num_partitions {
ivf_pq_builder = ivf_pq_builder.num_partitions(num_partitions);
}
if let Some(num_sub_vectors) = num_sub_vectors {
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
}
if let Some(max_iterations) = max_iterations {
ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations);
}
if let Some(sample_rate) = sample_rate {
ivf_pq_builder = ivf_pq_builder.sample_rate(sample_rate);
}
Ok(Self {
inner: Mutex::new(Some(LanceDbIndex::IvfPq(ivf_pq_builder))),
})
}
#[napi]
pub fn scalar(&self) -> napi::Result<()> {
self.modify(|b| b.scalar())
}
#[napi]
pub async fn build(&self) -> napi::Result<()> {
let inner = self.inner.lock().unwrap().take().ok_or_else(|| {
napi::Error::from_reason("IndexBuilder has already been consumed".to_string())
})?;
inner
.build()
.await
.map_err(|e| napi::Error::from_reason(format!("Failed to build index: {}", e)))?;
Ok(())
#[napi(factory)]
pub fn btree() -> Self {
Self {
inner: Mutex::new(Some(LanceDbIndex::BTree(BTreeIndexBuilder::default()))),
}
}
}

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use futures::StreamExt;
use lance::io::RecordBatchStream;
use lancedb::arrow::SendableRecordBatchStream;
use lancedb::ipc::batches_to_ipc_file;
use napi::bindgen_prelude::*;
use napi_derive::napi;
@@ -21,12 +21,12 @@ use napi_derive::napi;
/** Typescript-style Async Iterator over RecordBatches */
#[napi]
pub struct RecordBatchIterator {
inner: Box<dyn RecordBatchStream + Unpin>,
inner: SendableRecordBatchStream,
}
#[napi]
impl RecordBatchIterator {
pub(crate) fn new(inner: Box<dyn RecordBatchStream + Unpin>) -> Self {
pub(crate) fn new(inner: SendableRecordBatchStream) -> Self {
Self { inner }
}

View File

@@ -16,6 +16,7 @@ use connection::Connection;
use napi_derive::*;
mod connection;
mod error;
mod index;
mod iterator;
mod query;

View File

@@ -74,6 +74,6 @@ impl Query {
let inner_stream = self.inner.execute_stream().await.map_err(|e| {
napi::Error::from_reason(format!("Failed to execute query stream: {}", e))
})?;
Ok(RecordBatchIterator::new(Box::new(inner_stream)))
Ok(RecordBatchIterator::new(inner_stream))
}
}

View File

@@ -13,13 +13,16 @@
// limitations under the License.
use arrow_ipc::writer::FileWriter;
use lance::dataset::ColumnAlteration as LanceColumnAlteration;
use lancedb::ipc::ipc_file_to_batches;
use lancedb::table::{AddDataMode, Table as LanceDbTable};
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, NewColumnTransform,
Table as LanceDbTable,
};
use napi::bindgen_prelude::*;
use napi_derive::napi;
use crate::index::IndexBuilder;
use crate::error::NapiErrorExt;
use crate::index::Index;
use crate::query::Query;
#[napi]
@@ -129,8 +132,22 @@ impl Table {
}
#[napi]
pub fn create_index(&self) -> napi::Result<IndexBuilder> {
Ok(IndexBuilder::new(self.inner_ref()?))
pub async fn create_index(
&self,
index: Option<&Index>,
column: String,
replace: Option<bool>,
) -> napi::Result<()> {
let lancedb_index = if let Some(index) = index {
index.consume()?
} else {
lancedb::index::Index::Auto
};
let mut builder = self.inner_ref()?.create_index(&[column], lancedb_index);
if let Some(replace) = replace {
builder = builder.replace(replace);
}
builder.execute().await.default_error()
}
#[napi]
@@ -144,7 +161,7 @@ impl Table {
.into_iter()
.map(|sql| (sql.name, sql.value_sql))
.collect::<Vec<_>>();
let transforms = lance::dataset::NewColumnTransform::SqlExpressions(transforms);
let transforms = NewColumnTransform::SqlExpressions(transforms);
self.inner_ref()?
.add_columns(transforms, None)
.await