mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
8 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a9897d9d85 | ||
|
|
acda7a4589 | ||
|
|
dac0857745 | ||
|
|
0a9e1eab75 | ||
|
|
d999d72c8d | ||
|
|
de4720993e | ||
|
|
6c14a307e2 | ||
|
|
43747278c8 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.15.0"
|
||||
current_version = "0.15.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
16
Cargo.toml
16
Cargo.toml
@@ -23,14 +23,14 @@ rust-version = "1.78.0"
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.23.0", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "53.2", optional = false }
|
||||
arrow-array = "53.2"
|
||||
|
||||
@@ -114,14 +114,17 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = [\n",
|
||||
" {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n",
|
||||
" {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n",
|
||||
"]\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"db.create_table(\"table2\", data)\n",
|
||||
"\n",
|
||||
"db[\"table2\"].head() "
|
||||
"data = pd.DataFrame(\n",
|
||||
" {\n",
|
||||
" \"vector\": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],\n",
|
||||
" \"lat\": [45.5, 40.1],\n",
|
||||
" \"long\": [-122.7, -74.1],\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"db.create_table(\"my_table_pandas\", data)\n",
|
||||
"db[\"my_table_pandas\"].head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -164,7 +167,7 @@
|
||||
"import pyarrow as pa\n",
|
||||
"\n",
|
||||
"custom_schema = pa.schema([\n",
|
||||
"pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n",
|
||||
"pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n",
|
||||
"pa.field(\"lat\", pa.float32()),\n",
|
||||
"pa.field(\"long\", pa.float32())\n",
|
||||
"])\n",
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.15.0-final.0</version>
|
||||
<version>0.15.1-beta.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.15.0-final.0</version>
|
||||
<version>0.15.1-beta.0</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
124
node/package-lock.json
generated
124
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,14 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.0"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -329,110 +329,6 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.0.tgz",
|
||||
"integrity": "sha512-FnBRsCrxvecjhkMQus9M9RQpXyhu1jxQjYGDaqqRIfcUd3ew7ahIR4qk9FyALHmjpPd72xJZgNLjliHtsIX4/w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.0.tgz",
|
||||
"integrity": "sha512-zy+nt1WBCabVI16u2t3sqGUXBOmnF5ZXMsHa9TWYEXVnbw5112K7/1783DTNA/ZBI/WziUa5jqYQ0GOwkgruqA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.0.tgz",
|
||||
"integrity": "sha512-2Pbw+z5Ij5QBvmBxmjaT5F2lNHftVWlarDM1bDc4JtgodJ3Js729qnVLQ0yehnlt+hM6aGFEyn8bH5vf6gEvpQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.0.tgz",
|
||||
"integrity": "sha512-WIvgd2EY2maCdYNHPC0C9RprjNWL83FkQKtn591xixltFk3XKgvBQ2USZW2tXndH/WVdvFQvystmZ3dgUrh8DQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.0.tgz",
|
||||
"integrity": "sha512-Pet3aPE+yQT13Gm0+fh11pgHvImS4X8Uf0zRdzsx0eja7x8j15VrVcZTEVTT4QdBNiZrhXBuiq482NJBsqe6vw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-musl": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.0.tgz",
|
||||
"integrity": "sha512-BC1RvIoEmyOr7ENp618vs9F05gdN7aKlToJNZnGIoi++hRZ25y39B1xxMXQHDnUL8G+Ur9kJObfQ43nVWqueTQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.0.tgz",
|
||||
"integrity": "sha512-H9BeryZl1aLxldtVP0XyiQJyzKStkuxS6SmIg+zaANr9Dns+LmVxYCz429JLC0DlvBWoYjTfK9WJTgMSZXr0Cg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.15.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.0.tgz",
|
||||
"integrity": "sha512-J8JICux2M82OR27i/4YAbEPlvszuE7EnGIU5jmm2+RTFaptKOCshH1C4D4jEXDAaHcUkVgsxyc9lGmGJCkGLhg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"private": false,
|
||||
"main": "dist/index.js",
|
||||
@@ -92,13 +92,13 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.0"
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.15.0"
|
||||
version = "0.15.1-beta.0"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -83,6 +83,74 @@ describe("embedding functions", () => {
|
||||
expect(vector0).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it("should be able to append and upsert using embedding function", async () => {
|
||||
@register()
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
return Array.from({ length: data.length }).fill([
|
||||
1, 2, 3,
|
||||
]) as number[][];
|
||||
}
|
||||
}
|
||||
const func = new MockEmbeddingFunction();
|
||||
const db = await connect(tmpDir.name);
|
||||
const table = await db.createTable(
|
||||
"test",
|
||||
[
|
||||
{ id: 1, text: "hello" },
|
||||
{ id: 2, text: "world" },
|
||||
],
|
||||
{
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const schema = await table.schema();
|
||||
expect(schema.metadata.get("embedding_functions")).toBeDefined();
|
||||
|
||||
// Append some new data
|
||||
const data1 = [
|
||||
{ id: 3, text: "forest" },
|
||||
{ id: 4, text: "mountain" },
|
||||
];
|
||||
await table.add(data1);
|
||||
|
||||
// Upsert some data
|
||||
const data2 = [
|
||||
{ id: 5, text: "river" },
|
||||
{ id: 2, text: "canyon" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("id")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(data2);
|
||||
|
||||
const rows = await table.query().toArray();
|
||||
rows.sort((a, b) => a.id - b.id);
|
||||
const texts = rows.map((row) => row.text);
|
||||
expect(texts).toEqual(["hello", "canyon", "forest", "mountain", "river"]);
|
||||
const vectorsDefined = rows.map(
|
||||
(row) => row.vector !== undefined && row.vector !== null,
|
||||
);
|
||||
expect(vectorsDefined).toEqual(new Array(5).fill(true));
|
||||
});
|
||||
|
||||
it("should be able to create an empty table with an embedding function", async () => {
|
||||
@register()
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
|
||||
@@ -609,6 +609,14 @@ async function applyEmbeddings<T>(
|
||||
return table;
|
||||
}
|
||||
|
||||
let schemaMetadata = schema?.metadata || new Map<string, string>();
|
||||
|
||||
if (!(embeddings == null || embeddings === undefined)) {
|
||||
const registry = getRegistry();
|
||||
const embeddingMetadata = registry.getTableMetadata([embeddings]);
|
||||
schemaMetadata = new Map([...schemaMetadata, ...embeddingMetadata]);
|
||||
}
|
||||
|
||||
// Convert from ArrowTable to Record<String, Vector>
|
||||
const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
|
||||
const name = table.schema.fields[idx].name;
|
||||
@@ -677,15 +685,21 @@ async function applyEmbeddings<T>(
|
||||
newColumns[destColumn] = makeVector(vectors, destType);
|
||||
}
|
||||
|
||||
const newTable = new ArrowTable(newColumns);
|
||||
let newTable = new ArrowTable(newColumns);
|
||||
if (schema != null) {
|
||||
if (schema.fields.find((f) => f.name === destColumn) === undefined) {
|
||||
throw new Error(
|
||||
`When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
|
||||
);
|
||||
}
|
||||
return alignTable(newTable, schema as Schema);
|
||||
newTable = alignTable(newTable, schema as Schema);
|
||||
}
|
||||
|
||||
newTable = new ArrowTable(
|
||||
new Schema(newTable.schema.fields, schemaMetadata),
|
||||
newTable.batches,
|
||||
);
|
||||
|
||||
return newTable;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
import { Data, fromDataToBuffer } from "./arrow";
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { Data, Schema, fromDataToBuffer } from "./arrow";
|
||||
import { NativeMergeInsertBuilder } from "./native";
|
||||
|
||||
/** A builder used to create and run a merge insert operation */
|
||||
export class MergeInsertBuilder {
|
||||
#native: NativeMergeInsertBuilder;
|
||||
#schema: Schema | Promise<Schema>;
|
||||
|
||||
/** Construct a MergeInsertBuilder. __Internal use only.__ */
|
||||
constructor(native: NativeMergeInsertBuilder) {
|
||||
constructor(
|
||||
native: NativeMergeInsertBuilder,
|
||||
schema: Schema | Promise<Schema>,
|
||||
) {
|
||||
this.#native = native;
|
||||
this.#schema = schema;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -35,6 +42,7 @@ export class MergeInsertBuilder {
|
||||
whenMatchedUpdateAll(options?: { where: string }): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.whenMatchedUpdateAll(options?.where),
|
||||
this.#schema,
|
||||
);
|
||||
}
|
||||
/**
|
||||
@@ -42,7 +50,10 @@ export class MergeInsertBuilder {
|
||||
* be inserted into the target table.
|
||||
*/
|
||||
whenNotMatchedInsertAll(): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll());
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.whenNotMatchedInsertAll(),
|
||||
this.#schema,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Rows that exist only in the target table (old data) will be
|
||||
@@ -56,6 +67,7 @@ export class MergeInsertBuilder {
|
||||
}): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.whenNotMatchedBySourceDelete(options?.where),
|
||||
this.#schema,
|
||||
);
|
||||
}
|
||||
/**
|
||||
@@ -64,7 +76,14 @@ export class MergeInsertBuilder {
|
||||
* Nothing is returned but the `Table` is updated
|
||||
*/
|
||||
async execute(data: Data): Promise<void> {
|
||||
const buffer = await fromDataToBuffer(data);
|
||||
let schema: Schema;
|
||||
if (this.#schema instanceof Promise) {
|
||||
schema = await this.#schema;
|
||||
this.#schema = schema; // In case of future calls
|
||||
} else {
|
||||
schema = this.#schema;
|
||||
}
|
||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||
await this.#native.execute(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -520,14 +520,8 @@ export class LocalTable extends Table {
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
||||
const mode = options?.mode ?? "append";
|
||||
const schema = await this.schema();
|
||||
const registry = getRegistry();
|
||||
const functions = await registry.parseFunctions(schema.metadata);
|
||||
|
||||
const buffer = await fromDataToBuffer(
|
||||
data,
|
||||
functions.values().next().value,
|
||||
schema,
|
||||
);
|
||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||
await this.inner.add(buffer, mode);
|
||||
}
|
||||
|
||||
@@ -733,7 +727,7 @@ export class LocalTable extends Table {
|
||||
}
|
||||
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
||||
on = Array.isArray(on) ? on : [on];
|
||||
return new MergeInsertBuilder(this.inner.mergeInsert(on));
|
||||
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.15.0",
|
||||
"version": "0.15.1-beta.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.18.1-beta.1"
|
||||
current_version = "0.18.1-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.18.1-beta.1"
|
||||
version = "0.18.1-beta.2"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -4,7 +4,7 @@ name = "lancedb"
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.23.0b2",
|
||||
"pylance==0.23.0b3",
|
||||
"tqdm>=4.27.0",
|
||||
"pydantic>=1.10",
|
||||
"packaging",
|
||||
|
||||
@@ -505,7 +505,7 @@ class LanceQueryBuilder(ABC):
|
||||
"column": self._vector_column,
|
||||
"q": self._query,
|
||||
"k": self._limit,
|
||||
"metric": self._metric,
|
||||
"metric": self._distance_type,
|
||||
"nprobes": self._nprobes,
|
||||
"refine_factor": self._refine_factor,
|
||||
"use_index": self._use_index,
|
||||
@@ -576,7 +576,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> table = db.create_table("my_table", data=data)
|
||||
>>> (table.search([0.4, 0.4])
|
||||
... .metric("cosine")
|
||||
... .distance_type("cosine")
|
||||
... .where("b < 10")
|
||||
... .select(["b", "vector"])
|
||||
... .limit(2)
|
||||
@@ -596,7 +596,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
self._metric = "L2"
|
||||
self._distance_type = "L2"
|
||||
self._nprobes = 20
|
||||
self._lower_bound = None
|
||||
self._upper_bound = None
|
||||
@@ -610,6 +610,9 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
Please use distance_type() instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
@@ -620,7 +623,32 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
LanceVectorQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
"""
|
||||
self._metric = metric.lower()
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
) -> "LanceVectorQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
When performing a vector search we try and find the "nearest" vectors according
|
||||
to some kind of distance metric. This parameter controls which distance metric
|
||||
to use.
|
||||
|
||||
Note: if there is a vector index then the distance type used MUST match the
|
||||
distance type used to train the vector index. If this is not done then the
|
||||
results will be invalid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceVectorQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
"""
|
||||
self._distance_type = distance_type.lower()
|
||||
return self
|
||||
|
||||
def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
|
||||
@@ -745,7 +773,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
filter=self._where,
|
||||
prefilter=self._prefilter,
|
||||
k=self._limit,
|
||||
metric=self._metric,
|
||||
metric=self._distance_type,
|
||||
columns=self._columns,
|
||||
nprobes=self._nprobes,
|
||||
lower_bound=self._lower_bound,
|
||||
@@ -1078,7 +1106,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._reranker = RRFReranker()
|
||||
self._nprobes = None
|
||||
self._refine_factor = None
|
||||
self._metric = None
|
||||
self._distance_type = None
|
||||
self._phrase_query = False
|
||||
|
||||
def _validate_query(self, query, vector=None, text=None):
|
||||
@@ -1146,8 +1174,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._fts_query.with_row_id(True)
|
||||
if self._phrase_query:
|
||||
self._fts_query.phrase_query(True)
|
||||
if self._metric:
|
||||
self._vector_query.metric(self._metric)
|
||||
if self._distance_type:
|
||||
self._vector_query.metric(self._distance_type)
|
||||
if self._nprobes:
|
||||
self._vector_query.nprobes(self._nprobes)
|
||||
if self._refine_factor:
|
||||
@@ -1386,6 +1414,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
Please use distance_type() instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
@@ -1396,7 +1427,32 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
LanceVectorQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
"""
|
||||
self._metric = metric.lower()
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
) -> "LanceHybridQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
When performing a vector search we try and find the "nearest" vectors according
|
||||
to some kind of distance metric. This parameter controls which distance metric
|
||||
to use.
|
||||
|
||||
Note: if there is a vector index then the distance type used MUST match the
|
||||
distance type used to train the vector index. If this is not done then the
|
||||
results will be invalid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceVectorQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
"""
|
||||
self._distance_type = distance_type.lower()
|
||||
return self
|
||||
|
||||
def refine_factor(self, refine_factor: int) -> LanceHybridQueryBuilder:
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_binary_vector():
|
||||
|
||||
query = np.random.randint(0, 2, size=256)
|
||||
packed_query = np.packbits(query)
|
||||
tbl.search(packed_query).metric("hamming").to_arrow()
|
||||
tbl.search(packed_query).distance_type("hamming").to_arrow()
|
||||
# --8<-- [end:sync_binary_vector]
|
||||
db.drop_table("my_binary_vectors")
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ def test_vector_search():
|
||||
tbl.search(np.random.random((1536))).limit(10).to_list()
|
||||
# --8<-- [end:exhaustive_search]
|
||||
# --8<-- [start:exhaustive_search_cosine]
|
||||
tbl.search(np.random.random((1536))).metric("cosine").limit(10).to_list()
|
||||
tbl.search(np.random.random((1536))).distance_type("cosine").limit(10).to_list()
|
||||
# --8<-- [end:exhaustive_search_cosine]
|
||||
# --8<-- [start:create_table_with_nested_schema]
|
||||
# Let's add 100 sample rows to our dataset
|
||||
|
||||
@@ -377,14 +377,14 @@ def test_query_builder_with_metric(table):
|
||||
df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
|
||||
df_l2 = (
|
||||
LanceVectorQueryBuilder(table, query, vector_column_name)
|
||||
.metric("L2")
|
||||
.distance_type("L2")
|
||||
.to_pandas()
|
||||
)
|
||||
tm.assert_frame_equal(df_default, df_l2)
|
||||
|
||||
df_cosine = (
|
||||
LanceVectorQueryBuilder(table, query, vector_column_name)
|
||||
.metric("cosine")
|
||||
.distance_type("cosine")
|
||||
.limit(1)
|
||||
.to_pandas()
|
||||
)
|
||||
@@ -401,7 +401,7 @@ def test_query_builder_with_different_vector_column():
|
||||
vector_column_name = "foo_vector"
|
||||
builder = (
|
||||
LanceVectorQueryBuilder(table, query, vector_column_name)
|
||||
.metric("cosine")
|
||||
.distance_type("cosine")
|
||||
.where("b < 10")
|
||||
.select(["b"])
|
||||
.limit(2)
|
||||
|
||||
@@ -366,7 +366,7 @@ def test_query_sync_maximal():
|
||||
with query_test_table(handler) as table:
|
||||
(
|
||||
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
|
||||
.metric("cosine")
|
||||
.distance_type("cosine")
|
||||
.limit(42)
|
||||
.offset(10)
|
||||
.refine_factor(10)
|
||||
|
||||
@@ -1242,7 +1242,9 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
|
||||
# with custom metric
|
||||
result_dot = (
|
||||
table.search("feeling lucky", query_type="hybrid").metric("dot").to_arrow()
|
||||
table.search("feeling lucky", query_type="hybrid")
|
||||
.distance_type("dot")
|
||||
.to_arrow()
|
||||
)
|
||||
result_l2 = table.search("feeling lucky", query_type="hybrid").to_arrow()
|
||||
assert len(result_dot) > 0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.15.0"
|
||||
version = "0.15.1-beta.0"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.15.0"
|
||||
version = "0.15.1-beta.0"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
Reference in New Issue
Block a user