mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 03:42:57 +00:00
Compare commits
3 Commits
lance-13.1
...
ayush/pyar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
40ffe03cc8 | ||
|
|
617ce3139b | ||
|
|
242bbe1897 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.6.0"
|
current_version = "0.5.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
19
Cargo.toml
19
Cargo.toml
@@ -20,18 +20,13 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
|||||||
categories = ["database-implementations"]
|
categories = ["database-implementations"]
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
# lance = { "version" = "=0.13.0", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.12.2", "features" = [
|
||||||
# lance-index = { "version" = "=0.13.0" }
|
"dynamodb",
|
||||||
# lance-linalg = { "version" = "=0.13.0" }
|
], git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
|
||||||
# lance-testing = { "version" = "=0.13.0" }
|
lance-index = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
|
||||||
# lance-datafusion = { "version" = "=0.13.0" }
|
lance-linalg = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
|
||||||
|
lance-testing = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
|
||||||
lance = { path = "../lance/rust/lance" }
|
lance-datafusion = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
|
||||||
lance-index = { path = "../lance/rust/lance-index" }
|
|
||||||
lance-linalg= { path = "../lance/rust/lance-linalg" }
|
|
||||||
lance-testing = { path = "../lance/rust/lance-testing" }
|
|
||||||
lance-datafusion = { path = "../lance/rust/lance-datafusion" }
|
|
||||||
|
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "51.0", optional = false }
|
arrow = { version = "51.0", optional = false }
|
||||||
arrow-array = "51.0"
|
arrow-array = "51.0"
|
||||||
|
|||||||
@@ -116,21 +116,21 @@ This guide will show how to create tables, insert data into them, and update the
|
|||||||
|
|
||||||
### From a Polars DataFrame
|
### From a Polars DataFrame
|
||||||
|
|
||||||
LanceDB supports [Polars](https://pola.rs/), a modern, fast DataFrame library
|
LanceDB supports [Polars](https://pola.rs/), a modern, fast DataFrame library
|
||||||
written in Rust. Just like in Pandas, the Polars integration is enabled by PyArrow
|
written in Rust. Just like in Pandas, the Polars integration is enabled by PyArrow
|
||||||
under the hood. A deeper integration between LanceDB Tables and Polars DataFrames
|
under the hood. A deeper integration between LanceDB Tables and Polars DataFrames
|
||||||
is on the way.
|
is on the way.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import polars as pl
|
import polars as pl
|
||||||
|
|
||||||
data = pl.DataFrame({
|
data = pl.DataFrame({
|
||||||
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
"vector": [[3.1, 4.1], [5.9, 26.5]],
|
||||||
"item": ["foo", "bar"],
|
"item": ["foo", "bar"],
|
||||||
"price": [10.0, 20.0]
|
"price": [10.0, 20.0]
|
||||||
})
|
})
|
||||||
table = db.create_table("pl_table", data=data)
|
table = db.create_table("pl_table", data=data)
|
||||||
```
|
```
|
||||||
|
|
||||||
### From an Arrow Table
|
### From an Arrow Table
|
||||||
=== "Python"
|
=== "Python"
|
||||||
|
|||||||
4
node/package-lock.json
generated
4
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
|
|||||||
@@ -39,9 +39,7 @@ describe.each([arrow, arrowOld])("Given a table", (arrow: any) => {
|
|||||||
let tmpDir: tmp.DirResult;
|
let tmpDir: tmp.DirResult;
|
||||||
let table: Table;
|
let table: Table;
|
||||||
|
|
||||||
const schema:
|
const schema = new arrow.Schema([
|
||||||
| import("apache-arrow").Schema
|
|
||||||
| import("apache-arrow-old").Schema = new arrow.Schema([
|
|
||||||
new arrow.Field("id", new arrow.Float64(), true),
|
new arrow.Field("id", new arrow.Float64(), true),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -317,7 +315,7 @@ describe("When creating an index", () => {
|
|||||||
.query()
|
.query()
|
||||||
.limit(2)
|
.limit(2)
|
||||||
.nearestTo(queryVec)
|
.nearestTo(queryVec)
|
||||||
.distanceType("dot")
|
.distanceType("DoT")
|
||||||
.toArrow();
|
.toArrow();
|
||||||
expect(rst.numRows).toBe(2);
|
expect(rst.numRows).toBe(2);
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
import {
|
import {
|
||||||
Table as ArrowTable,
|
Table as ArrowTable,
|
||||||
Binary,
|
Binary,
|
||||||
BufferType,
|
|
||||||
DataType,
|
DataType,
|
||||||
Field,
|
Field,
|
||||||
FixedSizeBinary,
|
FixedSizeBinary,
|
||||||
@@ -38,68 +37,14 @@ import {
|
|||||||
type makeTable,
|
type makeTable,
|
||||||
vectorFromArray,
|
vectorFromArray,
|
||||||
} from "apache-arrow";
|
} from "apache-arrow";
|
||||||
import { Buffers } from "apache-arrow/data";
|
|
||||||
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
||||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||||
import {
|
import { sanitizeField, sanitizeSchema, sanitizeType } from "./sanitize";
|
||||||
sanitizeField,
|
|
||||||
sanitizeSchema,
|
|
||||||
sanitizeTable,
|
|
||||||
sanitizeType,
|
|
||||||
} from "./sanitize";
|
|
||||||
export * from "apache-arrow";
|
export * from "apache-arrow";
|
||||||
export type SchemaLike =
|
|
||||||
| Schema
|
|
||||||
| {
|
|
||||||
fields: FieldLike[];
|
|
||||||
metadata: Map<string, string>;
|
|
||||||
get names(): unknown[];
|
|
||||||
};
|
|
||||||
export type FieldLike =
|
|
||||||
| Field
|
|
||||||
| {
|
|
||||||
type: string;
|
|
||||||
name: string;
|
|
||||||
nullable?: boolean;
|
|
||||||
metadata?: Map<string, string>;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type DataLike =
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
| import("apache-arrow").Data<Struct<any>>
|
|
||||||
| {
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
type: any;
|
|
||||||
length: number;
|
|
||||||
offset: number;
|
|
||||||
stride: number;
|
|
||||||
nullable: boolean;
|
|
||||||
children: DataLike[];
|
|
||||||
get nullCount(): number;
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
values: Buffers<any>[BufferType.DATA];
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
typeIds: Buffers<any>[BufferType.TYPE];
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
nullBitmap: Buffers<any>[BufferType.VALIDITY];
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
valueOffsets: Buffers<any>[BufferType.OFFSET];
|
|
||||||
};
|
|
||||||
|
|
||||||
export type RecordBatchLike =
|
|
||||||
| RecordBatch
|
|
||||||
| {
|
|
||||||
schema: SchemaLike;
|
|
||||||
data: DataLike;
|
|
||||||
};
|
|
||||||
|
|
||||||
export type TableLike =
|
|
||||||
| ArrowTable
|
|
||||||
| { schema: SchemaLike; batches: RecordBatchLike[] };
|
|
||||||
|
|
||||||
export type IntoVector = Float32Array | Float64Array | number[];
|
export type IntoVector = Float32Array | Float64Array | number[];
|
||||||
|
|
||||||
export function isArrowTable(value: object): value is TableLike {
|
export function isArrowTable(value: object): value is ArrowTable {
|
||||||
if (value instanceof ArrowTable) return true;
|
if (value instanceof ArrowTable) return true;
|
||||||
return "schema" in value && "batches" in value;
|
return "schema" in value && "batches" in value;
|
||||||
}
|
}
|
||||||
@@ -190,7 +135,7 @@ export function isFixedSizeList(value: unknown): value is FixedSizeList {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Data type accepted by NodeJS SDK */
|
/** Data type accepted by NodeJS SDK */
|
||||||
export type Data = Record<string, unknown>[] | TableLike;
|
export type Data = Record<string, unknown>[] | ArrowTable;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Options to control how a column should be converted to a vector array
|
* Options to control how a column should be converted to a vector array
|
||||||
@@ -217,7 +162,7 @@ export class MakeArrowTableOptions {
|
|||||||
* The schema must be specified if there are no records (e.g. to make
|
* The schema must be specified if there are no records (e.g. to make
|
||||||
* an empty table)
|
* an empty table)
|
||||||
*/
|
*/
|
||||||
schema?: SchemaLike;
|
schema?: Schema;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mapping from vector column name to expected type
|
* Mapping from vector column name to expected type
|
||||||
@@ -365,7 +310,7 @@ export function makeArrowTable(
|
|||||||
if (opt.schema !== undefined && opt.schema !== null) {
|
if (opt.schema !== undefined && opt.schema !== null) {
|
||||||
opt.schema = sanitizeSchema(opt.schema);
|
opt.schema = sanitizeSchema(opt.schema);
|
||||||
opt.schema = validateSchemaEmbeddings(
|
opt.schema = validateSchemaEmbeddings(
|
||||||
opt.schema as Schema,
|
opt.schema,
|
||||||
data,
|
data,
|
||||||
options?.embeddingFunction,
|
options?.embeddingFunction,
|
||||||
);
|
);
|
||||||
@@ -449,7 +394,7 @@ export function makeArrowTable(
|
|||||||
// `new ArrowTable(schema, batches)` which does not do any schema inference
|
// `new ArrowTable(schema, batches)` which does not do any schema inference
|
||||||
const firstTable = new ArrowTable(columns);
|
const firstTable = new ArrowTable(columns);
|
||||||
const batchesFixed = firstTable.batches.map(
|
const batchesFixed = firstTable.batches.map(
|
||||||
(batch) => new RecordBatch(opt.schema as Schema, batch.data),
|
(batch) => new RecordBatch(opt.schema!, batch.data),
|
||||||
);
|
);
|
||||||
let schema: Schema;
|
let schema: Schema;
|
||||||
if (metadata !== undefined) {
|
if (metadata !== undefined) {
|
||||||
@@ -462,9 +407,9 @@ export function makeArrowTable(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
|
schema = new Schema(opt.schema.fields, schemaMetadata);
|
||||||
} else {
|
} else {
|
||||||
schema = opt.schema as Schema;
|
schema = opt.schema;
|
||||||
}
|
}
|
||||||
return new ArrowTable(schema, batchesFixed);
|
return new ArrowTable(schema, batchesFixed);
|
||||||
}
|
}
|
||||||
@@ -480,7 +425,7 @@ export function makeArrowTable(
|
|||||||
* Create an empty Arrow table with the provided schema
|
* Create an empty Arrow table with the provided schema
|
||||||
*/
|
*/
|
||||||
export function makeEmptyTable(
|
export function makeEmptyTable(
|
||||||
schema: SchemaLike,
|
schema: Schema,
|
||||||
metadata?: Map<string, string>,
|
metadata?: Map<string, string>,
|
||||||
): ArrowTable {
|
): ArrowTable {
|
||||||
return makeArrowTable([], { schema }, metadata);
|
return makeArrowTable([], { schema }, metadata);
|
||||||
@@ -618,17 +563,18 @@ async function applyEmbeddingsFromMetadata(
|
|||||||
async function applyEmbeddings<T>(
|
async function applyEmbeddings<T>(
|
||||||
table: ArrowTable,
|
table: ArrowTable,
|
||||||
embeddings?: EmbeddingFunctionConfig,
|
embeddings?: EmbeddingFunctionConfig,
|
||||||
schema?: SchemaLike,
|
schema?: Schema,
|
||||||
): Promise<ArrowTable> {
|
): Promise<ArrowTable> {
|
||||||
if (schema !== undefined && schema !== null) {
|
|
||||||
schema = sanitizeSchema(schema);
|
|
||||||
}
|
|
||||||
if (schema?.metadata.has("embedding_functions")) {
|
if (schema?.metadata.has("embedding_functions")) {
|
||||||
return applyEmbeddingsFromMetadata(table, schema! as Schema);
|
return applyEmbeddingsFromMetadata(table, schema!);
|
||||||
} else if (embeddings == null || embeddings === undefined) {
|
} else if (embeddings == null || embeddings === undefined) {
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (schema !== undefined && schema !== null) {
|
||||||
|
schema = sanitizeSchema(schema);
|
||||||
|
}
|
||||||
|
|
||||||
// Convert from ArrowTable to Record<String, Vector>
|
// Convert from ArrowTable to Record<String, Vector>
|
||||||
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
||||||
const name = table.schema.fields[idx].name;
|
const name = table.schema.fields[idx].name;
|
||||||
@@ -704,7 +650,7 @@ async function applyEmbeddings<T>(
|
|||||||
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
|
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return alignTable(newTable, schema as Schema);
|
return alignTable(newTable, schema);
|
||||||
}
|
}
|
||||||
return newTable;
|
return newTable;
|
||||||
}
|
}
|
||||||
@@ -798,7 +744,7 @@ export async function fromRecordsToStreamBuffer(
|
|||||||
export async function fromTableToBuffer(
|
export async function fromTableToBuffer(
|
||||||
table: ArrowTable,
|
table: ArrowTable,
|
||||||
embeddings?: EmbeddingFunctionConfig,
|
embeddings?: EmbeddingFunctionConfig,
|
||||||
schema?: SchemaLike,
|
schema?: Schema,
|
||||||
): Promise<Buffer> {
|
): Promise<Buffer> {
|
||||||
if (schema !== undefined && schema !== null) {
|
if (schema !== undefined && schema !== null) {
|
||||||
schema = sanitizeSchema(schema);
|
schema = sanitizeSchema(schema);
|
||||||
@@ -825,7 +771,7 @@ export async function fromDataToBuffer(
|
|||||||
schema = sanitizeSchema(schema);
|
schema = sanitizeSchema(schema);
|
||||||
}
|
}
|
||||||
if (isArrowTable(data)) {
|
if (isArrowTable(data)) {
|
||||||
return fromTableToBuffer(sanitizeTable(data), embeddings, schema);
|
return fromTableToBuffer(data, embeddings, schema);
|
||||||
} else {
|
} else {
|
||||||
const table = await convertToTable(data, embeddings, { schema });
|
const table = await convertToTable(data, embeddings, { schema });
|
||||||
return fromTableToBuffer(table);
|
return fromTableToBuffer(table);
|
||||||
@@ -843,7 +789,7 @@ export async function fromDataToBuffer(
|
|||||||
export async function fromTableToStreamBuffer(
|
export async function fromTableToStreamBuffer(
|
||||||
table: ArrowTable,
|
table: ArrowTable,
|
||||||
embeddings?: EmbeddingFunctionConfig,
|
embeddings?: EmbeddingFunctionConfig,
|
||||||
schema?: SchemaLike,
|
schema?: Schema,
|
||||||
): Promise<Buffer> {
|
): Promise<Buffer> {
|
||||||
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
|
const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema);
|
||||||
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
|
const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings);
|
||||||
@@ -908,6 +854,7 @@ function validateSchemaEmbeddings(
|
|||||||
for (let field of schema.fields) {
|
for (let field of schema.fields) {
|
||||||
if (isFixedSizeList(field.type)) {
|
if (isFixedSizeList(field.type)) {
|
||||||
field = sanitizeField(field);
|
field = sanitizeField(field);
|
||||||
|
|
||||||
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
||||||
if (schema.metadata.has("embedding_functions")) {
|
if (schema.metadata.has("embedding_functions")) {
|
||||||
const embeddings = JSON.parse(
|
const embeddings = JSON.parse(
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
import { Data, Schema, SchemaLike, TableLike } from "./arrow";
|
import { Table as ArrowTable, Data, Schema } from "./arrow";
|
||||||
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
||||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||||
import { Connection as LanceDbConnection } from "./native";
|
import { Connection as LanceDbConnection } from "./native";
|
||||||
@@ -50,7 +50,7 @@ export interface CreateTableOptions {
|
|||||||
* The default is true while the new format is in beta
|
* The default is true while the new format is in beta
|
||||||
*/
|
*/
|
||||||
useLegacyFormat?: boolean;
|
useLegacyFormat?: boolean;
|
||||||
schema?: SchemaLike;
|
schema?: Schema;
|
||||||
embeddingFunction?: EmbeddingFunctionConfig;
|
embeddingFunction?: EmbeddingFunctionConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,12 +167,12 @@ export abstract class Connection {
|
|||||||
/**
|
/**
|
||||||
* Creates a new Table and initialize it with new data.
|
* Creates a new Table and initialize it with new data.
|
||||||
* @param {string} name - The name of the table.
|
* @param {string} name - The name of the table.
|
||||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
* @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
|
||||||
* to be inserted into the table
|
* to be inserted into the table
|
||||||
*/
|
*/
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
name: string,
|
name: string,
|
||||||
data: Record<string, unknown>[] | TableLike,
|
data: Record<string, unknown>[] | ArrowTable,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
@@ -183,7 +183,7 @@ export abstract class Connection {
|
|||||||
*/
|
*/
|
||||||
abstract createEmptyTable(
|
abstract createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: Schema,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
@@ -235,7 +235,7 @@ export class LocalConnection extends Connection {
|
|||||||
nameOrOptions:
|
nameOrOptions:
|
||||||
| string
|
| string
|
||||||
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
||||||
data?: Record<string, unknown>[] | TableLike,
|
data?: Record<string, unknown>[] | ArrowTable,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||||
@@ -259,7 +259,7 @@ export class LocalConnection extends Connection {
|
|||||||
|
|
||||||
async createEmptyTable(
|
async createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: Schema,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
let mode: string = options?.mode ?? "create";
|
let mode: string = options?.mode ?? "create";
|
||||||
|
|||||||
@@ -300,9 +300,7 @@ export class VectorQuery extends QueryBase<NativeVectorQuery, VectorQuery> {
|
|||||||
*
|
*
|
||||||
* By default "l2" is used.
|
* By default "l2" is used.
|
||||||
*/
|
*/
|
||||||
distanceType(
|
distanceType(distanceType: string): VectorQuery {
|
||||||
distanceType: Required<IvfPqOptions>["distanceType"],
|
|
||||||
): VectorQuery {
|
|
||||||
this.inner.distanceType(distanceType);
|
this.inner.distanceType(distanceType);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,5 @@
|
|||||||
import { Schema } from "apache-arrow";
|
import { Schema } from "apache-arrow";
|
||||||
import {
|
import { Data, fromTableToStreamBuffer, makeEmptyTable } from "../arrow";
|
||||||
Data,
|
|
||||||
SchemaLike,
|
|
||||||
fromTableToStreamBuffer,
|
|
||||||
makeEmptyTable,
|
|
||||||
} from "../arrow";
|
|
||||||
import {
|
import {
|
||||||
Connection,
|
Connection,
|
||||||
CreateTableOptions,
|
CreateTableOptions,
|
||||||
@@ -161,7 +156,7 @@ export class RemoteConnection extends Connection {
|
|||||||
|
|
||||||
async createEmptyTable(
|
async createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: SchemaLike,
|
schema: Schema,
|
||||||
options?: Partial<CreateTableOptions> | undefined,
|
options?: Partial<CreateTableOptions> | undefined,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (options?.mode) {
|
if (options?.mode) {
|
||||||
|
|||||||
@@ -20,12 +20,10 @@
|
|||||||
// comes from the exact same library instance. This is not always the case
|
// comes from the exact same library instance. This is not always the case
|
||||||
// and so we must sanitize the input to ensure that it is compatible.
|
// and so we must sanitize the input to ensure that it is compatible.
|
||||||
|
|
||||||
import { BufferType, Data } from "apache-arrow";
|
|
||||||
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type";
|
||||||
import {
|
import {
|
||||||
Binary,
|
Binary,
|
||||||
Bool,
|
Bool,
|
||||||
DataLike,
|
|
||||||
DataType,
|
DataType,
|
||||||
DateDay,
|
DateDay,
|
||||||
DateMillisecond,
|
DateMillisecond,
|
||||||
@@ -58,14 +56,9 @@ import {
|
|||||||
Map_,
|
Map_,
|
||||||
Null,
|
Null,
|
||||||
type Precision,
|
type Precision,
|
||||||
RecordBatch,
|
|
||||||
RecordBatchLike,
|
|
||||||
Schema,
|
Schema,
|
||||||
SchemaLike,
|
|
||||||
SparseUnion,
|
SparseUnion,
|
||||||
Struct,
|
Struct,
|
||||||
Table,
|
|
||||||
TableLike,
|
|
||||||
Time,
|
Time,
|
||||||
TimeMicrosecond,
|
TimeMicrosecond,
|
||||||
TimeMillisecond,
|
TimeMillisecond,
|
||||||
@@ -495,7 +488,7 @@ export function sanitizeField(fieldLike: unknown): Field {
|
|||||||
* instance because they might be using a different instance of apache-arrow
|
* instance because they might be using a different instance of apache-arrow
|
||||||
* than lancedb is using.
|
* than lancedb is using.
|
||||||
*/
|
*/
|
||||||
export function sanitizeSchema(schemaLike: SchemaLike): Schema {
|
export function sanitizeSchema(schemaLike: unknown): Schema {
|
||||||
if (schemaLike instanceof Schema) {
|
if (schemaLike instanceof Schema) {
|
||||||
return schemaLike;
|
return schemaLike;
|
||||||
}
|
}
|
||||||
@@ -521,68 +514,3 @@ export function sanitizeSchema(schemaLike: SchemaLike): Schema {
|
|||||||
);
|
);
|
||||||
return new Schema(sanitizedFields, metadata);
|
return new Schema(sanitizedFields, metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function sanitizeTable(tableLike: TableLike): Table {
|
|
||||||
if (tableLike instanceof Table) {
|
|
||||||
return tableLike;
|
|
||||||
}
|
|
||||||
if (typeof tableLike !== "object" || tableLike === null) {
|
|
||||||
throw Error("Expected a Table but object was null/undefined");
|
|
||||||
}
|
|
||||||
if (!("schema" in tableLike)) {
|
|
||||||
throw Error(
|
|
||||||
"The table passed in does not appear to be a table (no 'schema' property)",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (!("batches" in tableLike)) {
|
|
||||||
throw Error(
|
|
||||||
"The table passed in does not appear to be a table (no 'columns' property)",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const schema = sanitizeSchema(tableLike.schema);
|
|
||||||
|
|
||||||
const batches = tableLike.batches.map(sanitizeRecordBatch);
|
|
||||||
return new Table(schema, batches);
|
|
||||||
}
|
|
||||||
|
|
||||||
function sanitizeRecordBatch(batchLike: RecordBatchLike): RecordBatch {
|
|
||||||
if (batchLike instanceof RecordBatch) {
|
|
||||||
return batchLike;
|
|
||||||
}
|
|
||||||
if (typeof batchLike !== "object" || batchLike === null) {
|
|
||||||
throw Error("Expected a RecordBatch but object was null/undefined");
|
|
||||||
}
|
|
||||||
if (!("schema" in batchLike)) {
|
|
||||||
throw Error(
|
|
||||||
"The record batch passed in does not appear to be a record batch (no 'schema' property)",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (!("data" in batchLike)) {
|
|
||||||
throw Error(
|
|
||||||
"The record batch passed in does not appear to be a record batch (no 'data' property)",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
const schema = sanitizeSchema(batchLike.schema);
|
|
||||||
const data = sanitizeData(batchLike.data);
|
|
||||||
return new RecordBatch(schema, data);
|
|
||||||
}
|
|
||||||
function sanitizeData(
|
|
||||||
dataLike: DataLike,
|
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
|
||||||
): import("apache-arrow").Data<Struct<any>> {
|
|
||||||
if (dataLike instanceof Data) {
|
|
||||||
return dataLike;
|
|
||||||
}
|
|
||||||
return new Data(
|
|
||||||
dataLike.type,
|
|
||||||
dataLike.offset,
|
|
||||||
dataLike.length,
|
|
||||||
dataLike.nullCount,
|
|
||||||
{
|
|
||||||
[BufferType.OFFSET]: dataLike.valueOffsets,
|
|
||||||
[BufferType.DATA]: dataLike.values,
|
|
||||||
[BufferType.VALIDITY]: dataLike.nullBitmap,
|
|
||||||
[BufferType.TYPE]: dataLike.typeIds,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ import {
|
|||||||
Data,
|
Data,
|
||||||
IntoVector,
|
IntoVector,
|
||||||
Schema,
|
Schema,
|
||||||
TableLike,
|
|
||||||
fromDataToBuffer,
|
fromDataToBuffer,
|
||||||
fromTableToBuffer,
|
fromTableToBuffer,
|
||||||
fromTableToStreamBuffer,
|
fromTableToStreamBuffer,
|
||||||
@@ -39,8 +38,6 @@ import {
|
|||||||
Table as _NativeTable,
|
Table as _NativeTable,
|
||||||
} from "./native";
|
} from "./native";
|
||||||
import { Query, VectorQuery } from "./query";
|
import { Query, VectorQuery } from "./query";
|
||||||
import { sanitizeTable } from "./sanitize";
|
|
||||||
export { IndexConfig } from "./native";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Options for adding data to a table.
|
* Options for adding data to a table.
|
||||||
@@ -384,7 +381,8 @@ export abstract class Table {
|
|||||||
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
|
||||||
|
|
||||||
static async parseTableData(
|
static async parseTableData(
|
||||||
data: Record<string, unknown>[] | TableLike,
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
|
data: Record<string, unknown>[] | ArrowTable<any>,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
streaming = false,
|
streaming = false,
|
||||||
) {
|
) {
|
||||||
@@ -397,9 +395,9 @@ export abstract class Table {
|
|||||||
|
|
||||||
let table: ArrowTable;
|
let table: ArrowTable;
|
||||||
if (isArrowTable(data)) {
|
if (isArrowTable(data)) {
|
||||||
table = sanitizeTable(data);
|
table = data;
|
||||||
} else {
|
} else {
|
||||||
table = makeArrowTable(data as Record<string, unknown>[], options);
|
table = makeArrowTable(data, options);
|
||||||
}
|
}
|
||||||
if (streaming) {
|
if (streaming) {
|
||||||
const buf = await fromTableToStreamBuffer(
|
const buf = await fromTableToStreamBuffer(
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.6.0",
|
"version": "0.5.2",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.9.0"
|
current_version = "0.8.2"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.9.0"
|
version = "0.8.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
# version in Cargo.toml
|
# version in Cargo.toml
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.13.0",
|
"pylance==0.12.2-beta.2",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
|
|||||||
@@ -119,7 +119,9 @@ class Reranker(ABC):
|
|||||||
fts_results : pa.Table
|
fts_results : pa.Table
|
||||||
The results from the FTS search
|
The results from the FTS search
|
||||||
"""
|
"""
|
||||||
combined = pa.concat_tables([vector_results, fts_results], promote=True)
|
combined = pa.concat_tables(
|
||||||
|
[vector_results, fts_results], promote_options="default"
|
||||||
|
)
|
||||||
row_id = combined.column("_rowid")
|
row_id = combined.column("_rowid")
|
||||||
|
|
||||||
# deduplicate
|
# deduplicate
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.6.0"
|
version = "0.5.2"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.6.0"
|
version = "0.5.2"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -6,12 +6,3 @@
|
|||||||
LanceDB Rust SDK, a serverless vector database.
|
LanceDB Rust SDK, a serverless vector database.
|
||||||
|
|
||||||
Read more at: https://lancedb.com/
|
Read more at: https://lancedb.com/
|
||||||
|
|
||||||
> [!TIP]
|
|
||||||
> A transitive dependency of `lancedb` is `lzma-sys`, which uses dynamic linking
|
|
||||||
> by default. If you want to statically link `lzma-sys`, you should activate it's
|
|
||||||
> `static` feature by adding the following to your dependencies:
|
|
||||||
>
|
|
||||||
> ```toml
|
|
||||||
> lzma-sys = { version = "*", features = ["static"] }
|
|
||||||
> ```
|
|
||||||
|
|||||||
@@ -1889,7 +1889,6 @@ impl TableInternal for NativeTable {
|
|||||||
}
|
}
|
||||||
columns.push(field.name.clone());
|
columns.push(field.name.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
let index_type = if is_vector {
|
let index_type = if is_vector {
|
||||||
crate::index::IndexType::IvfPq
|
crate::index::IndexType::IvfPq
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user