mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-25 22:29:58 +00:00
Compare commits
1 Commits
rmeng/0124
...
change_to_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bcbc4541e7 |
@@ -46,7 +46,6 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
target: aarch64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
|
||||
1
.github/workflows/build_mac_wheel/action.yml
vendored
1
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -21,6 +21,5 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
interpreter: 3.${{ inputs.python-minor-version }}
|
||||
|
||||
@@ -26,7 +26,6 @@ runs:
|
||||
with:
|
||||
command: build
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
|
||||
4
.github/workflows/python.yml
vendored
4
.github/workflows/python.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
workspaces: python
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
@@ -189,7 +189,7 @@ jobs:
|
||||
- name: Install lancedb
|
||||
run: |
|
||||
pip install "pydantic<2"
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
2
.github/workflows/run_tests/action.yml
vendored
2
.github/workflows/run_tests/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
||||
- name: Install lancedb
|
||||
shell: bash
|
||||
run: |
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
pip3 install $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
- name: Setup localstack for integration tests
|
||||
if: ${{ inputs.integration == 'true' }}
|
||||
shell: bash
|
||||
|
||||
@@ -14,7 +14,7 @@ repos:
|
||||
hooks:
|
||||
- id: local-biome-check
|
||||
name: biome check
|
||||
entry: npx @biomejs/biome@1.7.3 check --config-path nodejs/biome.json nodejs/
|
||||
entry: npx @biomejs/biome check --config-path nodejs/biome.json nodejs/
|
||||
language: system
|
||||
types: [text]
|
||||
files: "nodejs/.*"
|
||||
|
||||
12
Cargo.toml
12
Cargo.toml
@@ -20,13 +20,11 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.12.4", "features" = [
|
||||
"dynamodb",
|
||||
]}
|
||||
lance-index = { "version" = "=0.12.4" }
|
||||
lance-linalg = { "version" = "=0.12.4" }
|
||||
lance-testing = { "version" = "=0.12.4" }
|
||||
lance-datafusion = { "version" = "=0.12.4" }
|
||||
lance = { "version" = "=0.12.1", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.12.1" }
|
||||
lance-linalg = { "version" = "=0.12.1" }
|
||||
lance-testing = { "version" = "=0.12.1" }
|
||||
lance-datafusion = { "version" = "=0.12.1" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "51.0", optional = false }
|
||||
arrow-array = "51.0"
|
||||
|
||||
@@ -132,140 +132,6 @@ describe.each([arrow, arrowOld])("Given a table", (arrow: any) => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("merge insert", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let table: Table;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
const conn = await connect(tmpDir.name);
|
||||
|
||||
table = await conn.createTable("some_table", [
|
||||
{ a: 1, b: "a" },
|
||||
{ a: 2, b: "b" },
|
||||
{ a: 3, b: "c" },
|
||||
]);
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
test("upsert", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newData);
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
|
||||
expect(
|
||||
JSON.parse(JSON.stringify((await table.toArrow()).toArray())),
|
||||
).toEqual(expected);
|
||||
});
|
||||
test("conditional update", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll({ where: "target.b = 'b'" })
|
||||
.execute(newData);
|
||||
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 3, b: "c" },
|
||||
];
|
||||
// round trip to arrow and back to json to avoid comparing arrow objects to js object
|
||||
// biome-ignore lint/suspicious/noExplicitAny: test
|
||||
let res: any[] = JSON.parse(
|
||||
JSON.stringify((await table.toArrow()).toArray()),
|
||||
);
|
||||
res = res.sort((a, b) => a.a - b.a);
|
||||
|
||||
expect(res).toEqual(expected);
|
||||
});
|
||||
|
||||
test("insert if not exists", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 3, b: "y" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
await table.mergeInsert("a").whenNotMatchedInsertAll().execute(newData);
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
{ a: 2, b: "b" },
|
||||
{ a: 3, b: "c" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
let res: any[] = JSON.parse(
|
||||
JSON.stringify((await table.toArrow()).toArray()),
|
||||
);
|
||||
res = res.sort((a, b) => a.a - b.a);
|
||||
expect(res).toEqual(expected);
|
||||
});
|
||||
test("replace range", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.whenNotMatchedBySourceDelete({ where: "a > 2" })
|
||||
.execute(newData);
|
||||
|
||||
const expected = [
|
||||
{ a: 1, b: "a" },
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
let res: any[] = JSON.parse(
|
||||
JSON.stringify((await table.toArrow()).toArray()),
|
||||
);
|
||||
res = res.sort((a, b) => a.a - b.a);
|
||||
expect(res).toEqual(expected);
|
||||
});
|
||||
test("replace range no condition", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("a")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.whenNotMatchedBySourceDelete()
|
||||
.execute(newData);
|
||||
|
||||
const expected = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: test
|
||||
let res: any[] = JSON.parse(
|
||||
JSON.stringify((await table.toArrow()).toArray()),
|
||||
);
|
||||
res = res.sort((a, b) => a.a - b.a);
|
||||
expect(res).toEqual(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("When creating an index", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
const schema = new Schema([
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
"noDuplicateObjectKeys": "error",
|
||||
"noDuplicateParameters": "error",
|
||||
"noEmptyBlockStatements": "error",
|
||||
"noExplicitAny": "warn",
|
||||
"noExplicitAny": "error",
|
||||
"noExtraNonNullAssertion": "error",
|
||||
"noFallthroughSwitchClause": "error",
|
||||
"noFunctionAssign": "error",
|
||||
|
||||
@@ -13,10 +13,37 @@
|
||||
// limitations under the License.
|
||||
|
||||
import { Table as ArrowTable, Schema } from "./arrow";
|
||||
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
||||
import {
|
||||
fromTableToBuffer,
|
||||
isArrowTable,
|
||||
makeArrowTable,
|
||||
makeEmptyTable,
|
||||
} from "./arrow";
|
||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||
import { Connection as LanceDbConnection } from "./native";
|
||||
import { LocalTable, Table } from "./table";
|
||||
import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
|
||||
import { Table } from "./table";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
*
|
||||
* Accepted formats:
|
||||
*
|
||||
* - `/path/to/database` - local database
|
||||
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
* - `db://host:port` - remote database (LanceDB cloud)
|
||||
* @param {string} uri - The uri of the database. If the database uri starts
|
||||
* with `db://` then it connects to a remote database.
|
||||
* @see {@link ConnectionOptions} for more details on the URI format.
|
||||
*/
|
||||
export async function connect(
|
||||
uri: string,
|
||||
opts?: Partial<ConnectionOptions>,
|
||||
): Promise<Connection> {
|
||||
opts = opts ?? {};
|
||||
opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
|
||||
const nativeConn = await LanceDbConnection.new(uri, opts);
|
||||
return new Connection(nativeConn);
|
||||
}
|
||||
|
||||
export interface CreateTableOptions {
|
||||
/**
|
||||
@@ -90,6 +117,7 @@ export interface TableNamesOptions {
|
||||
/** An optional limit to the number of results to return. */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||
*
|
||||
@@ -108,15 +136,17 @@ export interface TableNamesOptions {
|
||||
* Any created tables are independent and will continue to work even if
|
||||
* the underlying connection has been closed.
|
||||
*/
|
||||
export abstract class Connection {
|
||||
[Symbol.for("nodejs.util.inspect.custom")](): string {
|
||||
return this.display();
|
||||
export class Connection {
|
||||
readonly inner: LanceDbConnection;
|
||||
|
||||
constructor(inner: LanceDbConnection) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the connection has not been closed
|
||||
*/
|
||||
abstract isOpen(): boolean;
|
||||
/** Return true if the connection has not been closed */
|
||||
isOpen(): boolean {
|
||||
return this.inner.isOpen();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the connection, releasing any underlying resources.
|
||||
@@ -125,12 +155,14 @@ export abstract class Connection {
|
||||
*
|
||||
* Any attempt to use the connection after it is closed will result in an error.
|
||||
*/
|
||||
abstract close(): void;
|
||||
close(): void {
|
||||
this.inner.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a brief description of the connection
|
||||
*/
|
||||
abstract display(): string;
|
||||
/** Return a brief description of the connection */
|
||||
display(): string {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
/**
|
||||
* List all the table names in this database.
|
||||
@@ -138,73 +170,15 @@ export abstract class Connection {
|
||||
* Tables will be returned in lexicographical order.
|
||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||
* paging / start point
|
||||
*
|
||||
*/
|
||||
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
||||
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||
return this.inner.tableNames(options?.startAfter, options?.limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
* @param {string} name - The name of the table
|
||||
*/
|
||||
abstract openTable(
|
||||
name: string,
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
|
||||
* to be inserted into the table
|
||||
*/
|
||||
abstract createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | ArrowTable,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Creates a new empty Table
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Schema} schema - The schema of the table
|
||||
*/
|
||||
abstract createEmptyTable(
|
||||
name: string,
|
||||
schema: Schema,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param {string} name The name of the table to drop.
|
||||
*/
|
||||
abstract dropTable(name: string): Promise<void>;
|
||||
}
|
||||
|
||||
export class LocalConnection extends Connection {
|
||||
readonly inner: LanceDbConnection;
|
||||
|
||||
constructor(inner: LanceDbConnection) {
|
||||
super();
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
isOpen(): boolean {
|
||||
return this.inner.isOpen();
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.inner.close();
|
||||
}
|
||||
|
||||
display(): string {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||
return this.inner.tableNames(options?.startAfter, options?.limit);
|
||||
}
|
||||
|
||||
async openTable(
|
||||
name: string,
|
||||
options?: Partial<OpenTableOptions>,
|
||||
@@ -215,15 +189,39 @@ export class LocalConnection extends Connection {
|
||||
options?.indexCacheSize,
|
||||
);
|
||||
|
||||
return new LocalTable(innerTable);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Record<string, unknown>[] | ArrowTable} data - Non-empty Array of Records
|
||||
* to be inserted into the table
|
||||
*/
|
||||
async createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | ArrowTable,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
const { buf, mode } = await Table.parseTableData(data, options);
|
||||
let mode: string = options?.mode ?? "create";
|
||||
const existOk = options?.existOk ?? false;
|
||||
|
||||
if (mode === "create" && existOk) {
|
||||
mode = "exist_ok";
|
||||
}
|
||||
|
||||
let table: ArrowTable;
|
||||
if (isArrowTable(data)) {
|
||||
table = data;
|
||||
} else {
|
||||
table = makeArrowTable(data, options);
|
||||
}
|
||||
|
||||
const buf = await fromTableToBuffer(
|
||||
table,
|
||||
options?.embeddingFunction,
|
||||
options?.schema,
|
||||
);
|
||||
const innerTable = await this.inner.createTable(
|
||||
name,
|
||||
buf,
|
||||
@@ -232,9 +230,14 @@ export class LocalConnection extends Connection {
|
||||
options?.useLegacyFormat,
|
||||
);
|
||||
|
||||
return new LocalTable(innerTable);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new empty Table
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Schema} schema - The schema of the table
|
||||
*/
|
||||
async createEmptyTable(
|
||||
name: string,
|
||||
schema: Schema,
|
||||
@@ -262,9 +265,13 @@ export class LocalConnection extends Connection {
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
options?.useLegacyFormat,
|
||||
);
|
||||
return new LocalTable(innerTable);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param {string} name The name of the table to drop.
|
||||
*/
|
||||
async dropTable(name: string): Promise<void> {
|
||||
return this.inner.dropTable(name);
|
||||
}
|
||||
@@ -273,7 +280,7 @@ export class LocalConnection extends Connection {
|
||||
/**
|
||||
* Takes storage options and makes all the keys snake case.
|
||||
*/
|
||||
export function cleanseStorageOptions(
|
||||
function cleanseStorageOptions(
|
||||
options?: Record<string, string>,
|
||||
): Record<string, string> | undefined {
|
||||
if (options === undefined) {
|
||||
|
||||
@@ -12,19 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
Connection,
|
||||
LocalConnection,
|
||||
cleanseStorageOptions,
|
||||
} from "./connection";
|
||||
|
||||
import {
|
||||
ConnectionOptions,
|
||||
Connection as LanceDbConnection,
|
||||
} from "./native.js";
|
||||
|
||||
import { RemoteConnection, RemoteConnectionOptions } from "./remote";
|
||||
|
||||
export {
|
||||
WriteOptions,
|
||||
WriteMode,
|
||||
@@ -32,20 +19,18 @@ export {
|
||||
ColumnAlteration,
|
||||
ConnectionOptions,
|
||||
} from "./native.js";
|
||||
|
||||
export {
|
||||
makeArrowTable,
|
||||
MakeArrowTableOptions,
|
||||
Data,
|
||||
VectorColumnOptions,
|
||||
} from "./arrow";
|
||||
|
||||
export {
|
||||
connect,
|
||||
Connection,
|
||||
CreateTableOptions,
|
||||
TableNamesOptions,
|
||||
} from "./connection";
|
||||
|
||||
export {
|
||||
ExecutableQuery,
|
||||
Query,
|
||||
@@ -53,46 +38,6 @@ export {
|
||||
VectorQuery,
|
||||
RecordBatchIterator,
|
||||
} from "./query";
|
||||
|
||||
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
||||
|
||||
export {
|
||||
Table,
|
||||
AddDataOptions,
|
||||
IndexConfig,
|
||||
UpdateOptions,
|
||||
} from "./table";
|
||||
|
||||
export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
|
||||
export * as embedding from "./embedding";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
*
|
||||
* Accepted formats:
|
||||
*
|
||||
* - `/path/to/database` - local database
|
||||
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
* - `db://host:port` - remote database (LanceDB cloud)
|
||||
* @param {string} uri - The uri of the database. If the database uri starts
|
||||
* with `db://` then it connects to a remote database.
|
||||
* @see {@link ConnectionOptions} for more details on the URI format.
|
||||
*/
|
||||
export async function connect(
|
||||
uri: string,
|
||||
opts?: Partial<ConnectionOptions | RemoteConnectionOptions>,
|
||||
): Promise<Connection> {
|
||||
if (!uri) {
|
||||
throw new Error("uri is required");
|
||||
}
|
||||
opts = opts ?? {};
|
||||
|
||||
if (uri?.startsWith("db://")) {
|
||||
return new RemoteConnection(uri, opts as RemoteConnectionOptions);
|
||||
}
|
||||
opts = (opts as ConnectionOptions) ?? {};
|
||||
(<ConnectionOptions>opts).storageOptions = cleanseStorageOptions(
|
||||
(<ConnectionOptions>opts).storageOptions,
|
||||
);
|
||||
const nativeConn = await LanceDbConnection.new(uri, opts);
|
||||
return new LocalConnection(nativeConn);
|
||||
}
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
import { Data, fromDataToBuffer } from "./arrow";
|
||||
import { NativeMergeInsertBuilder } from "./native";
|
||||
|
||||
/** A builder used to create and run a merge insert operation */
|
||||
export class MergeInsertBuilder {
|
||||
#native: NativeMergeInsertBuilder;
|
||||
|
||||
/** Construct a MergeInsertBuilder. __Internal use only.__ */
|
||||
constructor(native: NativeMergeInsertBuilder) {
|
||||
this.#native = native;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rows that exist in both the source table (new data) and
|
||||
* the target table (old data) will be updated, replacing
|
||||
* the old row with the corresponding matching row.
|
||||
*
|
||||
* If there are multiple matches then the behavior is undefined.
|
||||
* Currently this causes multiple copies of the row to be created
|
||||
* but that behavior is subject to change.
|
||||
*
|
||||
* An optional condition may be specified. If it is, then only
|
||||
* matched rows that satisfy the condtion will be updated. Any
|
||||
* rows that do not satisfy the condition will be left as they
|
||||
* are. Failing to satisfy the condition does not cause a
|
||||
* "matched row" to become a "not matched" row.
|
||||
*
|
||||
* The condition should be an SQL string. Use the prefix
|
||||
* target. to refer to rows in the target table (old data)
|
||||
* and the prefix source. to refer to rows in the source
|
||||
* table (new data).
|
||||
*
|
||||
* For example, "target.last_update < source.last_update"
|
||||
*/
|
||||
whenMatchedUpdateAll(options?: { where: string }): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.whenMatchedUpdateAll(options?.where),
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Rows that exist only in the source table (new data) should
|
||||
* be inserted into the target table.
|
||||
*/
|
||||
whenNotMatchedInsertAll(): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll());
|
||||
}
|
||||
/**
|
||||
* Rows that exist only in the target table (old data) will be
|
||||
* deleted. An optional condition can be provided to limit what
|
||||
* data is deleted.
|
||||
*
|
||||
* @param options.where - An optional condition to limit what data is deleted
|
||||
*/
|
||||
whenNotMatchedBySourceDelete(options?: {
|
||||
where: string;
|
||||
}): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.whenNotMatchedBySourceDelete(options?.where),
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Executes the merge insert operation
|
||||
*
|
||||
* Nothing is returned but the `Table` is updated
|
||||
*/
|
||||
async execute(data: Data): Promise<void> {
|
||||
const buffer = await fromDataToBuffer(data);
|
||||
await this.#native.execute(buffer);
|
||||
}
|
||||
}
|
||||
@@ -1,221 +0,0 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import axios, {
|
||||
AxiosError,
|
||||
type AxiosResponse,
|
||||
type ResponseType,
|
||||
} from "axios";
|
||||
import { Table as ArrowTable } from "../arrow";
|
||||
import { tableFromIPC } from "../arrow";
|
||||
import { VectorQuery } from "../query";
|
||||
|
||||
export class RestfulLanceDBClient {
|
||||
#dbName: string;
|
||||
#region: string;
|
||||
#apiKey: string;
|
||||
#hostOverride?: string;
|
||||
#closed: boolean = false;
|
||||
#connectionTimeout: number = 12 * 1000; // 12 seconds;
|
||||
#readTimeout: number = 30 * 1000; // 30 seconds;
|
||||
#session?: import("axios").AxiosInstance;
|
||||
|
||||
constructor(
|
||||
dbName: string,
|
||||
apiKey: string,
|
||||
region: string,
|
||||
hostOverride?: string,
|
||||
connectionTimeout?: number,
|
||||
readTimeout?: number,
|
||||
) {
|
||||
this.#dbName = dbName;
|
||||
this.#apiKey = apiKey;
|
||||
this.#region = region;
|
||||
this.#hostOverride = hostOverride ?? this.#hostOverride;
|
||||
this.#connectionTimeout = connectionTimeout ?? this.#connectionTimeout;
|
||||
this.#readTimeout = readTimeout ?? this.#readTimeout;
|
||||
}
|
||||
|
||||
// todo: cache the session.
|
||||
get session(): import("axios").AxiosInstance {
|
||||
if (this.#session !== undefined) {
|
||||
return this.#session;
|
||||
} else {
|
||||
return axios.create({
|
||||
baseURL: this.url,
|
||||
headers: {
|
||||
// biome-ignore lint/style/useNamingConvention: external api
|
||||
Authorization: `Bearer ${this.#apiKey}`,
|
||||
},
|
||||
transformResponse: decodeErrorData,
|
||||
timeout: this.#connectionTimeout,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
get url(): string {
|
||||
return (
|
||||
this.#hostOverride ??
|
||||
`https://${this.#dbName}.${this.#region}.api.lancedb.com`
|
||||
);
|
||||
}
|
||||
|
||||
get headers(): { [key: string]: string } {
|
||||
const headers: { [key: string]: string } = {
|
||||
"x-api-key": this.#apiKey,
|
||||
"x-request-id": "na",
|
||||
};
|
||||
if (this.#region == "local") {
|
||||
headers["Host"] = `${this.#dbName}.${this.#region}.api.lancedb.com`;
|
||||
}
|
||||
if (this.#hostOverride) {
|
||||
headers["x-lancedb-database"] = this.#dbName;
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
isOpen(): boolean {
|
||||
return !this.#closed;
|
||||
}
|
||||
|
||||
private checkNotClosed(): void {
|
||||
if (this.#closed) {
|
||||
throw new Error("Connection is closed");
|
||||
}
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.#session = undefined;
|
||||
this.#closed = true;
|
||||
}
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
async get(uri: string, params?: Record<string, any>): Promise<any> {
|
||||
this.checkNotClosed();
|
||||
uri = new URL(uri, this.url).toString();
|
||||
let response;
|
||||
try {
|
||||
response = await this.session.get(uri, {
|
||||
headers: this.headers,
|
||||
params,
|
||||
});
|
||||
} catch (e) {
|
||||
if (e instanceof AxiosError) {
|
||||
response = e.response;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
RestfulLanceDBClient.checkStatus(response!);
|
||||
return response!.data;
|
||||
}
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: api response
|
||||
async post(uri: string, body?: any): Promise<any>;
|
||||
async post(
|
||||
uri: string,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: api request
|
||||
body: any,
|
||||
additional: {
|
||||
config?: { responseType: "arraybuffer" };
|
||||
headers?: Record<string, string>;
|
||||
params?: Record<string, string>;
|
||||
},
|
||||
): Promise<Buffer>;
|
||||
async post(
|
||||
uri: string,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: api request
|
||||
body?: any,
|
||||
additional?: {
|
||||
config?: { responseType: ResponseType };
|
||||
headers?: Record<string, string>;
|
||||
params?: Record<string, string>;
|
||||
},
|
||||
// biome-ignore lint/suspicious/noExplicitAny: api response
|
||||
): Promise<any> {
|
||||
this.checkNotClosed();
|
||||
uri = new URL(uri, this.url).toString();
|
||||
additional = Object.assign(
|
||||
{ config: { responseType: "json" } },
|
||||
additional,
|
||||
);
|
||||
|
||||
const headers = { ...this.headers, ...additional.headers };
|
||||
|
||||
if (!headers["Content-Type"]) {
|
||||
headers["Content-Type"] = "application/json";
|
||||
}
|
||||
let response;
|
||||
try {
|
||||
response = await this.session.post(uri, body, {
|
||||
headers,
|
||||
responseType: additional!.config!.responseType,
|
||||
params: new Map(Object.entries(additional.params ?? {})),
|
||||
});
|
||||
} catch (e) {
|
||||
if (e instanceof AxiosError) {
|
||||
response = e.response;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
RestfulLanceDBClient.checkStatus(response!);
|
||||
if (additional!.config!.responseType === "arraybuffer") {
|
||||
return response!.data;
|
||||
} else {
|
||||
return JSON.parse(response!.data);
|
||||
}
|
||||
}
|
||||
|
||||
async listTables(limit = 10, pageToken = ""): Promise<string[]> {
|
||||
const json = await this.get("/v1/table", { limit, pageToken });
|
||||
return json.tables;
|
||||
}
|
||||
|
||||
async query(tableName: string, query: VectorQuery): Promise<ArrowTable> {
|
||||
const tbl = await this.post(`/v1/table/${tableName}/query`, query, {
|
||||
config: {
|
||||
responseType: "arraybuffer",
|
||||
},
|
||||
});
|
||||
return tableFromIPC(tbl);
|
||||
}
|
||||
|
||||
static checkStatus(response: AxiosResponse): void {
|
||||
if (response.status === 404) {
|
||||
throw new Error(`Not found: ${response.data}`);
|
||||
} else if (response.status >= 400 && response.status < 500) {
|
||||
throw new Error(
|
||||
`Bad Request: ${response.status}, error: ${response.data}`,
|
||||
);
|
||||
} else if (response.status >= 500 && response.status < 600) {
|
||||
throw new Error(
|
||||
`Internal Server Error: ${response.status}, error: ${response.data}`,
|
||||
);
|
||||
} else if (response.status !== 200) {
|
||||
throw new Error(
|
||||
`Unknown Error: ${response.status}, error: ${response.data}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function decodeErrorData(data: unknown) {
|
||||
if (Buffer.isBuffer(data)) {
|
||||
const decoded = data.toString("utf-8");
|
||||
return decoded;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
@@ -1,187 +0,0 @@
|
||||
import { Schema } from "apache-arrow";
|
||||
import { Data, fromTableToStreamBuffer, makeEmptyTable } from "../arrow";
|
||||
import {
|
||||
Connection,
|
||||
CreateTableOptions,
|
||||
OpenTableOptions,
|
||||
TableNamesOptions,
|
||||
} from "../connection";
|
||||
import { Table } from "../table";
|
||||
import { TTLCache } from "../util";
|
||||
import { RestfulLanceDBClient } from "./client";
|
||||
import { RemoteTable } from "./table";
|
||||
|
||||
export interface RemoteConnectionOptions {
|
||||
apiKey?: string;
|
||||
region?: string;
|
||||
hostOverride?: string;
|
||||
connectionTimeout?: number;
|
||||
readTimeout?: number;
|
||||
}
|
||||
|
||||
export class RemoteConnection extends Connection {
|
||||
#dbName: string;
|
||||
#apiKey: string;
|
||||
#region: string;
|
||||
#client: RestfulLanceDBClient;
|
||||
#tableCache = new TTLCache(300_000);
|
||||
|
||||
constructor(
|
||||
url: string,
|
||||
{
|
||||
apiKey,
|
||||
region,
|
||||
hostOverride,
|
||||
connectionTimeout,
|
||||
readTimeout,
|
||||
}: RemoteConnectionOptions,
|
||||
) {
|
||||
super();
|
||||
apiKey = apiKey ?? process.env.LANCEDB_API_KEY;
|
||||
region = region ?? process.env.LANCEDB_REGION;
|
||||
|
||||
if (!apiKey) {
|
||||
throw new Error("apiKey is required when connecting to LanceDB Cloud");
|
||||
}
|
||||
|
||||
if (!region) {
|
||||
throw new Error("region is required when connecting to LanceDB Cloud");
|
||||
}
|
||||
|
||||
const parsed = new URL(url);
|
||||
if (parsed.protocol !== "db:") {
|
||||
throw new Error(
|
||||
`invalid protocol: ${parsed.protocol}, only accepts db://`,
|
||||
);
|
||||
}
|
||||
|
||||
this.#dbName = parsed.hostname;
|
||||
this.#apiKey = apiKey;
|
||||
this.#region = region;
|
||||
this.#client = new RestfulLanceDBClient(
|
||||
this.#dbName,
|
||||
this.#apiKey,
|
||||
this.#region,
|
||||
hostOverride,
|
||||
connectionTimeout,
|
||||
readTimeout,
|
||||
);
|
||||
}
|
||||
|
||||
isOpen(): boolean {
|
||||
return this.#client.isOpen();
|
||||
}
|
||||
close(): void {
|
||||
return this.#client.close();
|
||||
}
|
||||
|
||||
display(): string {
|
||||
return `RemoteConnection(${this.#dbName})`;
|
||||
}
|
||||
|
||||
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||
const response = await this.#client.get("/v1/table/", {
|
||||
limit: options?.limit ?? 10,
|
||||
// biome-ignore lint/style/useNamingConvention: <explanation>
|
||||
page_token: options?.startAfter ?? "",
|
||||
});
|
||||
const body = await response.body();
|
||||
for (const table of body.tables) {
|
||||
this.#tableCache.set(table, true);
|
||||
}
|
||||
return body.tables;
|
||||
}
|
||||
|
||||
async openTable(
|
||||
name: string,
|
||||
_options?: Partial<OpenTableOptions> | undefined,
|
||||
): Promise<Table> {
|
||||
if (this.#tableCache.get(name) === undefined) {
|
||||
await this.#client.post(
|
||||
`/v1/table/${encodeURIComponent(name)}/describe/`,
|
||||
);
|
||||
this.#tableCache.set(name, true);
|
||||
}
|
||||
return new RemoteTable(this.#client, name, this.#dbName);
|
||||
}
|
||||
|
||||
async createTable(
|
||||
tableName: string,
|
||||
data: Data,
|
||||
options?: Partial<CreateTableOptions> | undefined,
|
||||
): Promise<Table> {
|
||||
if (options?.mode) {
|
||||
console.warn(
|
||||
"option 'mode' is not supported in LanceDB Cloud",
|
||||
"LanceDB Cloud only supports the default 'create' mode.",
|
||||
"If the table already exists, an error will be thrown.",
|
||||
);
|
||||
}
|
||||
if (options?.embeddingFunction) {
|
||||
console.warn(
|
||||
"embedding_functions is not yet supported on LanceDB Cloud.",
|
||||
"Please vote https://github.com/lancedb/lancedb/issues/626 ",
|
||||
"for this feature.",
|
||||
);
|
||||
}
|
||||
|
||||
const { buf } = await Table.parseTableData(
|
||||
data,
|
||||
options,
|
||||
true /** streaming */,
|
||||
);
|
||||
|
||||
await this.#client.post(
|
||||
`/v1/table/${encodeURIComponent(tableName)}/create/`,
|
||||
buf,
|
||||
{
|
||||
config: {
|
||||
responseType: "arraybuffer",
|
||||
},
|
||||
headers: { "Content-Type": "application/vnd.apache.arrow.stream" },
|
||||
},
|
||||
);
|
||||
this.#tableCache.set(tableName, true);
|
||||
return new RemoteTable(this.#client, tableName, this.#dbName);
|
||||
}
|
||||
|
||||
async createEmptyTable(
|
||||
name: string,
|
||||
schema: Schema,
|
||||
options?: Partial<CreateTableOptions> | undefined,
|
||||
): Promise<Table> {
|
||||
if (options?.mode) {
|
||||
console.warn(`mode is not supported on LanceDB Cloud`);
|
||||
}
|
||||
|
||||
if (options?.embeddingFunction) {
|
||||
console.warn(
|
||||
"embeddingFunction is not yet supported on LanceDB Cloud.",
|
||||
"Please vote https://github.com/lancedb/lancedb/issues/626 ",
|
||||
"for this feature.",
|
||||
);
|
||||
}
|
||||
const emptyTable = makeEmptyTable(schema);
|
||||
const buf = await fromTableToStreamBuffer(emptyTable);
|
||||
|
||||
await this.#client.post(
|
||||
`/v1/table/${encodeURIComponent(name)}/create/`,
|
||||
buf,
|
||||
{
|
||||
config: {
|
||||
responseType: "arraybuffer",
|
||||
},
|
||||
headers: { "Content-Type": "application/vnd.apache.arrow.stream" },
|
||||
},
|
||||
);
|
||||
|
||||
this.#tableCache.set(name, true);
|
||||
return new RemoteTable(this.#client, name, this.#dbName);
|
||||
}
|
||||
|
||||
async dropTable(name: string): Promise<void> {
|
||||
await this.#client.post(`/v1/table/${encodeURIComponent(name)}/drop/`);
|
||||
|
||||
this.#tableCache.delete(name);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
export { RestfulLanceDBClient } from "./client";
|
||||
export { type RemoteConnectionOptions, RemoteConnection } from "./connection";
|
||||
export { RemoteTable } from "./table";
|
||||
@@ -1,164 +0,0 @@
|
||||
// Copyright 2023 LanceDB Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { Table as ArrowTable } from "apache-arrow";
|
||||
|
||||
import { Data, IntoVector } from "../arrow";
|
||||
|
||||
import { CreateTableOptions } from "../connection";
|
||||
import { IndexOptions } from "../indices";
|
||||
import { MergeInsertBuilder } from "../merge";
|
||||
import { VectorQuery } from "../query";
|
||||
import { AddDataOptions, Table, UpdateOptions } from "../table";
|
||||
import { RestfulLanceDBClient } from "./client";
|
||||
|
||||
export class RemoteTable extends Table {
|
||||
#client: RestfulLanceDBClient;
|
||||
#name: string;
|
||||
|
||||
// Used in the display() method
|
||||
#dbName: string;
|
||||
|
||||
get #tablePrefix() {
|
||||
return `/v1/table/${encodeURIComponent(this.#name)}/`;
|
||||
}
|
||||
|
||||
public constructor(
|
||||
client: RestfulLanceDBClient,
|
||||
tableName: string,
|
||||
dbName: string,
|
||||
) {
|
||||
super();
|
||||
this.#client = client;
|
||||
this.#name = tableName;
|
||||
this.#dbName = dbName;
|
||||
}
|
||||
|
||||
isOpen(): boolean {
|
||||
return !this.#client.isOpen();
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.#client.close();
|
||||
}
|
||||
|
||||
display(): string {
|
||||
return `RemoteTable(${this.#dbName}; ${this.#name})`;
|
||||
}
|
||||
|
||||
async schema(): Promise<import("apache-arrow").Schema> {
|
||||
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
|
||||
// TODO: parse this into a valid arrow schema
|
||||
return resp.schema;
|
||||
}
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
||||
const { buf, mode } = await Table.parseTableData(
|
||||
data,
|
||||
options as CreateTableOptions,
|
||||
true,
|
||||
);
|
||||
await this.#client.post(`${this.#tablePrefix}/insert/`, buf, {
|
||||
params: {
|
||||
mode,
|
||||
},
|
||||
headers: {
|
||||
"Content-Type": "application/vnd.apache.arrow.stream",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<void> {
|
||||
await this.#client.post(`${this.#tablePrefix}/update/`, {
|
||||
predicate: options?.where ?? null,
|
||||
updates: Object.entries(updates).map(([key, value]) => [key, value]),
|
||||
});
|
||||
}
|
||||
async countRows(filter?: unknown): Promise<number> {
|
||||
const payload = { predicate: filter };
|
||||
return await this.#client.post(`${this.#tablePrefix}/count_rows/`, payload);
|
||||
}
|
||||
|
||||
async delete(predicate: unknown): Promise<void> {
|
||||
const payload = { predicate };
|
||||
await this.#client.post(`${this.#tablePrefix}/delete/`, payload);
|
||||
}
|
||||
async createIndex(
|
||||
column: string,
|
||||
options?: Partial<IndexOptions>,
|
||||
): Promise<void> {
|
||||
if (options !== undefined) {
|
||||
console.warn("options are not yet supported on the LanceDB cloud");
|
||||
}
|
||||
const indexType = "vector";
|
||||
const metric = "L2";
|
||||
const data = {
|
||||
column,
|
||||
// biome-ignore lint/style/useNamingConvention: external API
|
||||
index_type: indexType,
|
||||
// biome-ignore lint/style/useNamingConvention: external API
|
||||
metric_type: metric,
|
||||
};
|
||||
await this.#client.post(`${this.#tablePrefix}/create_index`, data);
|
||||
}
|
||||
query(): import("..").Query {
|
||||
throw new Error("query() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
search(query: IntoVector): VectorQuery;
|
||||
search(query: string): Promise<VectorQuery>;
|
||||
search(_query: string | IntoVector): VectorQuery | Promise<VectorQuery> {
|
||||
throw new Error("search() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
vectorSearch(_vector: unknown): import("..").VectorQuery {
|
||||
throw new Error("vectorSearch() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
addColumns(_newColumnTransforms: unknown): Promise<void> {
|
||||
throw new Error("addColumns() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
alterColumns(_columnAlterations: unknown): Promise<void> {
|
||||
throw new Error("alterColumns() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
dropColumns(_columnNames: unknown): Promise<void> {
|
||||
throw new Error("dropColumns() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
async version(): Promise<number> {
|
||||
const resp = await this.#client.post(`${this.#tablePrefix}/describe/`);
|
||||
return resp.version;
|
||||
}
|
||||
checkout(_version: unknown): Promise<void> {
|
||||
throw new Error("checkout() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
checkoutLatest(): Promise<void> {
|
||||
throw new Error(
|
||||
"checkoutLatest() is not yet supported on the LanceDB cloud",
|
||||
);
|
||||
}
|
||||
restore(): Promise<void> {
|
||||
throw new Error("restore() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
optimize(_options?: unknown): Promise<import("../native").OptimizeStats> {
|
||||
throw new Error("optimize() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
async listIndices(): Promise<import("../native").IndexConfig[]> {
|
||||
return await this.#client.post(`${this.#tablePrefix}/index/list/`);
|
||||
}
|
||||
toArrow(): Promise<ArrowTable> {
|
||||
throw new Error("toArrow() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
mergeInsert(_on: string | string[]): MergeInsertBuilder {
|
||||
throw new Error("mergeInsert() is not yet supported on the LanceDB cloud");
|
||||
}
|
||||
}
|
||||
@@ -18,17 +18,11 @@ import {
|
||||
IntoVector,
|
||||
Schema,
|
||||
fromDataToBuffer,
|
||||
fromTableToBuffer,
|
||||
fromTableToStreamBuffer,
|
||||
isArrowTable,
|
||||
makeArrowTable,
|
||||
tableFromIPC,
|
||||
} from "./arrow";
|
||||
import { CreateTableOptions } from "./connection";
|
||||
|
||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||
import { IndexOptions } from "./indices";
|
||||
import { MergeInsertBuilder } from "./merge";
|
||||
import {
|
||||
AddColumnsSql,
|
||||
ColumnAlteration,
|
||||
@@ -94,13 +88,19 @@ export interface OptimizeOptions {
|
||||
* Closing a table is optional. It not closed, it will be closed when it is garbage
|
||||
* collected.
|
||||
*/
|
||||
export abstract class Table {
|
||||
[Symbol.for("nodejs.util.inspect.custom")](): string {
|
||||
return this.display();
|
||||
export class Table {
|
||||
private readonly inner: _NativeTable;
|
||||
|
||||
/** Construct a Table. Internal use only. */
|
||||
constructor(inner: _NativeTable) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
/** Return true if the table has not been closed */
|
||||
abstract isOpen(): boolean;
|
||||
isOpen(): boolean {
|
||||
return this.inner.isOpen();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the table, releasing any underlying resources.
|
||||
*
|
||||
@@ -108,16 +108,48 @@ export abstract class Table {
|
||||
*
|
||||
* Any attempt to use the table after it is closed will result in an error.
|
||||
*/
|
||||
abstract close(): void;
|
||||
close(): void {
|
||||
this.inner.close();
|
||||
}
|
||||
|
||||
/** Return a brief description of the table */
|
||||
abstract display(): string;
|
||||
display(): string {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
async #getEmbeddingFunctions(): Promise<
|
||||
Map<string, EmbeddingFunctionConfig>
|
||||
> {
|
||||
const schema = await this.schema();
|
||||
const registry = getRegistry();
|
||||
return registry.parseFunctions(schema.metadata);
|
||||
}
|
||||
|
||||
/** Get the schema of the table. */
|
||||
abstract schema(): Promise<Schema>;
|
||||
async schema(): Promise<Schema> {
|
||||
const schemaBuf = await this.inner.schema();
|
||||
const tbl = tableFromIPC(schemaBuf);
|
||||
return tbl.schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert records into this Table.
|
||||
* @param {Data} data Records to be inserted into the Table
|
||||
*/
|
||||
abstract add(data: Data, options?: Partial<AddDataOptions>): Promise<void>;
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
||||
const mode = options?.mode ?? "append";
|
||||
const schema = await this.schema();
|
||||
const registry = getRegistry();
|
||||
const functions = registry.parseFunctions(schema.metadata);
|
||||
|
||||
const buffer = await fromDataToBuffer(
|
||||
data,
|
||||
functions.values().next().value,
|
||||
schema,
|
||||
);
|
||||
await this.inner.add(buffer, mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update existing records in the Table
|
||||
*
|
||||
@@ -143,14 +175,30 @@ export abstract class Table {
|
||||
* @param {Partial<UpdateOptions>} options - additional options to control
|
||||
* the update behavior
|
||||
*/
|
||||
abstract update(
|
||||
async update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
options?: Partial<UpdateOptions>,
|
||||
): Promise<void>;
|
||||
) {
|
||||
const onlyIf = options?.where;
|
||||
let columns: [string, string][];
|
||||
if (updates instanceof Map) {
|
||||
columns = Array.from(updates.entries());
|
||||
} else {
|
||||
columns = Object.entries(updates);
|
||||
}
|
||||
await this.inner.update(onlyIf, columns);
|
||||
}
|
||||
|
||||
/** Count the total number of rows in the dataset. */
|
||||
abstract countRows(filter?: string): Promise<number>;
|
||||
async countRows(filter?: string): Promise<number> {
|
||||
return await this.inner.countRows(filter);
|
||||
}
|
||||
|
||||
/** Delete the rows that satisfy the predicate. */
|
||||
abstract delete(predicate: string): Promise<void>;
|
||||
async delete(predicate: string): Promise<void> {
|
||||
await this.inner.delete(predicate);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an index to speed up queries.
|
||||
*
|
||||
@@ -177,10 +225,13 @@ export abstract class Table {
|
||||
* // Or create a Scalar index
|
||||
* await table.createIndex("my_float_col");
|
||||
*/
|
||||
abstract createIndex(
|
||||
column: string,
|
||||
options?: Partial<IndexOptions>,
|
||||
): Promise<void>;
|
||||
async createIndex(column: string, options?: Partial<IndexOptions>) {
|
||||
// Bit of a hack to get around the fact that TS has no package-scope.
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
const nativeIndex = (options?.config as any)?.inner;
|
||||
await this.inner.createIndex(nativeIndex, column, options?.replace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link Query} Builder.
|
||||
*
|
||||
@@ -231,20 +282,44 @@ export abstract class Table {
|
||||
* }
|
||||
* @returns {Query} A builder that can be used to parameterize the query
|
||||
*/
|
||||
abstract query(): Query;
|
||||
query(): Query {
|
||||
return new Query(this.inner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a search query to find the nearest neighbors
|
||||
* of the given query vector
|
||||
* @param {string} query - the query. This will be converted to a vector using the table's provided embedding function
|
||||
* @rejects {Error} If no embedding functions are defined in the table
|
||||
*/
|
||||
abstract search(query: string): Promise<VectorQuery>;
|
||||
search(query: string): Promise<VectorQuery>;
|
||||
/**
|
||||
* Create a search query to find the nearest neighbors
|
||||
* of the given query vector
|
||||
* @param {IntoVector} query - the query vector
|
||||
*/
|
||||
abstract search(query: IntoVector): VectorQuery;
|
||||
search(query: IntoVector): VectorQuery;
|
||||
search(query: string | IntoVector): Promise<VectorQuery> | VectorQuery {
|
||||
if (typeof query !== "string") {
|
||||
return this.vectorSearch(query);
|
||||
} else {
|
||||
return this.#getEmbeddingFunctions().then(async (functions) => {
|
||||
// TODO: Support multiple embedding functions
|
||||
const embeddingFunc: EmbeddingFunctionConfig | undefined = functions
|
||||
.values()
|
||||
.next().value;
|
||||
if (!embeddingFunc) {
|
||||
return Promise.reject(
|
||||
new Error("No embedding functions are defined in the table"),
|
||||
);
|
||||
}
|
||||
const embeddings =
|
||||
await embeddingFunc.function.computeQueryEmbeddings(query);
|
||||
return this.query().nearestTo(embeddings);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search the table with a given query vector.
|
||||
*
|
||||
@@ -252,7 +327,11 @@ export abstract class Table {
|
||||
* is the same thing as calling `nearestTo` on the builder returned
|
||||
* by `query`. @see {@link Query#nearestTo} for more details.
|
||||
*/
|
||||
abstract vectorSearch(vector: IntoVector): VectorQuery;
|
||||
vectorSearch(vector: IntoVector): VectorQuery {
|
||||
return this.query().nearestTo(vector);
|
||||
}
|
||||
|
||||
// TODO: Support BatchUDF
|
||||
/**
|
||||
* Add new columns with defined values.
|
||||
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
|
||||
@@ -260,14 +339,19 @@ export abstract class Table {
|
||||
* expressions will be evaluated for each row in the table, and can
|
||||
* reference existing columns in the table.
|
||||
*/
|
||||
abstract addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
|
||||
async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
|
||||
await this.inner.addColumns(newColumnTransforms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alter the name or nullability of columns.
|
||||
* @param {ColumnAlteration[]} columnAlterations One or more alterations to
|
||||
* apply to columns.
|
||||
*/
|
||||
abstract alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
|
||||
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||
await this.inner.alterColumns(columnAlterations);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop one or more columns from the dataset
|
||||
*
|
||||
@@ -279,10 +363,15 @@ export abstract class Table {
|
||||
* be nested column references (e.g. "a.b.c") or top-level column names
|
||||
* (e.g. "a").
|
||||
*/
|
||||
abstract dropColumns(columnNames: string[]): Promise<void>;
|
||||
/** Retrieve the version of the table */
|
||||
async dropColumns(columnNames: string[]): Promise<void> {
|
||||
await this.inner.dropColumns(columnNames);
|
||||
}
|
||||
|
||||
/** Retrieve the version of the table */
|
||||
async version(): Promise<number> {
|
||||
return await this.inner.version();
|
||||
}
|
||||
|
||||
abstract version(): Promise<number>;
|
||||
/**
|
||||
* Checks out a specific version of the table _This is an in-place operation._
|
||||
*
|
||||
@@ -308,14 +397,19 @@ export abstract class Table {
|
||||
* console.log(await table.version()); // 2
|
||||
* ```
|
||||
*/
|
||||
abstract checkout(version: number): Promise<void>;
|
||||
async checkout(version: number): Promise<void> {
|
||||
await this.inner.checkout(version);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checkout the latest version of the table. _This is an in-place operation._
|
||||
*
|
||||
* The table will be set back into standard mode, and will track the latest
|
||||
* version of the table.
|
||||
*/
|
||||
abstract checkoutLatest(): Promise<void>;
|
||||
async checkoutLatest(): Promise<void> {
|
||||
await this.inner.checkoutLatest();
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore the table to the currently checked out version
|
||||
@@ -329,7 +423,10 @@ export abstract class Table {
|
||||
* Once the operation concludes the table will no longer be in a checked
|
||||
* out state and the read_consistency_interval, if any, will apply.
|
||||
*/
|
||||
abstract restore(): Promise<void>;
|
||||
async restore(): Promise<void> {
|
||||
await this.inner.restore();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize the on-disk data and indices for better performance.
|
||||
*
|
||||
@@ -360,191 +457,6 @@ export abstract class Table {
|
||||
* you have added or modified 100,000 or more records or run more than 20 data
|
||||
* modification operations.
|
||||
*/
|
||||
abstract optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats>;
|
||||
/** List all indices that have been created with {@link Table.createIndex} */
|
||||
abstract listIndices(): Promise<IndexConfig[]>;
|
||||
/** Return the table as an arrow table */
|
||||
abstract toArrow(): Promise<ArrowTable>;
|
||||
|
||||
abstract mergeInsert(on: string | string[]): MergeInsertBuilder;
|
||||
|
||||
static async parseTableData(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
data: Record<string, unknown>[] | ArrowTable<any>,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
streaming = false,
|
||||
) {
|
||||
let mode: string = options?.mode ?? "create";
|
||||
const existOk = options?.existOk ?? false;
|
||||
|
||||
if (mode === "create" && existOk) {
|
||||
mode = "exist_ok";
|
||||
}
|
||||
|
||||
let table: ArrowTable;
|
||||
if (isArrowTable(data)) {
|
||||
table = data;
|
||||
} else {
|
||||
table = makeArrowTable(data, options);
|
||||
}
|
||||
if (streaming) {
|
||||
const buf = await fromTableToStreamBuffer(
|
||||
table,
|
||||
options?.embeddingFunction,
|
||||
options?.schema,
|
||||
);
|
||||
return { buf, mode };
|
||||
} else {
|
||||
const buf = await fromTableToBuffer(
|
||||
table,
|
||||
options?.embeddingFunction,
|
||||
options?.schema,
|
||||
);
|
||||
return { buf, mode };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class LocalTable extends Table {
|
||||
private readonly inner: _NativeTable;
|
||||
|
||||
constructor(inner: _NativeTable) {
|
||||
super();
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
isOpen(): boolean {
|
||||
return this.inner.isOpen();
|
||||
}
|
||||
|
||||
close(): void {
|
||||
this.inner.close();
|
||||
}
|
||||
|
||||
display(): string {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
private async getEmbeddingFunctions(): Promise<
|
||||
Map<string, EmbeddingFunctionConfig>
|
||||
> {
|
||||
const schema = await this.schema();
|
||||
const registry = getRegistry();
|
||||
return registry.parseFunctions(schema.metadata);
|
||||
}
|
||||
|
||||
/** Get the schema of the table. */
|
||||
async schema(): Promise<Schema> {
|
||||
const schemaBuf = await this.inner.schema();
|
||||
const tbl = tableFromIPC(schemaBuf);
|
||||
return tbl.schema;
|
||||
}
|
||||
|
||||
async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
|
||||
const mode = options?.mode ?? "append";
|
||||
const schema = await this.schema();
|
||||
const registry = getRegistry();
|
||||
const functions = registry.parseFunctions(schema.metadata);
|
||||
|
||||
const buffer = await fromDataToBuffer(
|
||||
data,
|
||||
functions.values().next().value,
|
||||
schema,
|
||||
);
|
||||
await this.inner.add(buffer, mode);
|
||||
}
|
||||
|
||||
async update(
|
||||
updates: Map<string, string> | Record<string, string>,
|
||||
options?: Partial<UpdateOptions>,
|
||||
) {
|
||||
const onlyIf = options?.where;
|
||||
let columns: [string, string][];
|
||||
if (updates instanceof Map) {
|
||||
columns = Array.from(updates.entries());
|
||||
} else {
|
||||
columns = Object.entries(updates);
|
||||
}
|
||||
await this.inner.update(onlyIf, columns);
|
||||
}
|
||||
|
||||
async countRows(filter?: string): Promise<number> {
|
||||
return await this.inner.countRows(filter);
|
||||
}
|
||||
|
||||
async delete(predicate: string): Promise<void> {
|
||||
await this.inner.delete(predicate);
|
||||
}
|
||||
|
||||
async createIndex(column: string, options?: Partial<IndexOptions>) {
|
||||
// Bit of a hack to get around the fact that TS has no package-scope.
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
const nativeIndex = (options?.config as any)?.inner;
|
||||
await this.inner.createIndex(nativeIndex, column, options?.replace);
|
||||
}
|
||||
|
||||
query(): Query {
|
||||
return new Query(this.inner);
|
||||
}
|
||||
|
||||
search(query: string): Promise<VectorQuery>;
|
||||
|
||||
search(query: IntoVector): VectorQuery;
|
||||
search(query: string | IntoVector): Promise<VectorQuery> | VectorQuery {
|
||||
if (typeof query !== "string") {
|
||||
return this.vectorSearch(query);
|
||||
} else {
|
||||
return this.getEmbeddingFunctions().then(async (functions) => {
|
||||
// TODO: Support multiple embedding functions
|
||||
const embeddingFunc: EmbeddingFunctionConfig | undefined = functions
|
||||
.values()
|
||||
.next().value;
|
||||
if (!embeddingFunc) {
|
||||
return Promise.reject(
|
||||
new Error("No embedding functions are defined in the table"),
|
||||
);
|
||||
}
|
||||
const embeddings =
|
||||
await embeddingFunc.function.computeQueryEmbeddings(query);
|
||||
return this.query().nearestTo(embeddings);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
vectorSearch(vector: IntoVector): VectorQuery {
|
||||
return this.query().nearestTo(vector);
|
||||
}
|
||||
|
||||
// TODO: Support BatchUDF
|
||||
|
||||
async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
|
||||
await this.inner.addColumns(newColumnTransforms);
|
||||
}
|
||||
|
||||
async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||
await this.inner.alterColumns(columnAlterations);
|
||||
}
|
||||
|
||||
async dropColumns(columnNames: string[]): Promise<void> {
|
||||
await this.inner.dropColumns(columnNames);
|
||||
}
|
||||
|
||||
async version(): Promise<number> {
|
||||
return await this.inner.version();
|
||||
}
|
||||
|
||||
async checkout(version: number): Promise<void> {
|
||||
await this.inner.checkout(version);
|
||||
}
|
||||
|
||||
async checkoutLatest(): Promise<void> {
|
||||
await this.inner.checkoutLatest();
|
||||
}
|
||||
|
||||
async restore(): Promise<void> {
|
||||
await this.inner.restore();
|
||||
}
|
||||
|
||||
async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
|
||||
let cleanupOlderThanMs;
|
||||
if (
|
||||
@@ -557,16 +469,13 @@ export class LocalTable extends Table {
|
||||
return await this.inner.optimize(cleanupOlderThanMs);
|
||||
}
|
||||
|
||||
/** List all indices that have been created with {@link Table.createIndex} */
|
||||
async listIndices(): Promise<IndexConfig[]> {
|
||||
return await this.inner.listIndices();
|
||||
}
|
||||
|
||||
/** Return the table as an arrow table */
|
||||
async toArrow(): Promise<ArrowTable> {
|
||||
return await this.query().toArrow();
|
||||
}
|
||||
|
||||
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
||||
on = Array.isArray(on) ? on : [on];
|
||||
return new MergeInsertBuilder(this.inner.mergeInsert(on));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
export class TTLCache {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
private readonly cache: Map<string, { value: any; expires: number }>;
|
||||
|
||||
/**
|
||||
* @param ttl Time to live in milliseconds
|
||||
*/
|
||||
constructor(private readonly ttl: number) {
|
||||
this.cache = new Map();
|
||||
}
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
get(key: string): any | undefined {
|
||||
const entry = this.cache.get(key);
|
||||
if (entry === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (entry.expires < Date.now()) {
|
||||
this.cache.delete(key);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return entry.value;
|
||||
}
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
set(key: string, value: any): void {
|
||||
this.cache.set(key, { value, expires: Date.now() + this.ttl });
|
||||
}
|
||||
|
||||
delete(key: string): void {
|
||||
this.cache.delete(key);
|
||||
}
|
||||
}
|
||||
75
nodejs/package-lock.json
generated
75
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -18,10 +18,7 @@
|
||||
"win32"
|
||||
],
|
||||
"dependencies": {
|
||||
"@types/axios": "^0.14.0",
|
||||
"apache-arrow": "^15.0.0",
|
||||
"axios": "^1.7.2",
|
||||
"memoize": "^10.0.0",
|
||||
"openai": "^4.29.2",
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
@@ -3126,15 +3123,6 @@
|
||||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/axios": {
|
||||
"version": "0.14.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/axios/-/axios-0.14.0.tgz",
|
||||
"integrity": "sha512-KqQnQbdYE54D7oa/UmYVMZKq7CO4l8DEENzOKc4aBRwxCXSlJXGz83flFx5L7AWrOQnmuN3kVsRdt+GZPPjiVQ==",
|
||||
"deprecated": "This is a stub types definition for axios (https://github.com/mzabriskie/axios). axios provides its own type definitions, so you don't need @types/axios installed!",
|
||||
"dependencies": {
|
||||
"axios": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/babel__core": {
|
||||
"version": "7.20.5",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz",
|
||||
@@ -3509,16 +3497,6 @@
|
||||
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz",
|
||||
"integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/babel-jest": {
|
||||
"version": "29.7.0",
|
||||
"resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz",
|
||||
@@ -4500,25 +4478,6 @@
|
||||
"integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/follow-redirects": {
|
||||
"version": "1.15.6",
|
||||
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
|
||||
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "individual",
|
||||
"url": "https://github.com/sponsors/RubenVerborgh"
|
||||
}
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=4.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"debug": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
||||
@@ -5942,20 +5901,6 @@
|
||||
"is-buffer": "~1.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/memoize": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/memoize/-/memoize-10.0.0.tgz",
|
||||
"integrity": "sha512-H6cBLgsi6vMWOcCpvVCdFFnl3kerEXbrYh9q+lY6VXvQSmM6CkmV08VOwT+WE2tzIEqRPFfAq3fm4v/UIW6mSA==",
|
||||
"dependencies": {
|
||||
"mimic-function": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sindresorhus/memoize?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/merge-stream": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
|
||||
@@ -6003,17 +5948,6 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/mimic-function": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/mimic-function/-/mimic-function-5.0.1.tgz",
|
||||
"integrity": "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
@@ -6425,11 +6359,6 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/proxy-from-env": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
||||
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
||||
},
|
||||
"node_modules/punycode": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
|
||||
|
||||
@@ -65,9 +65,7 @@
|
||||
"version": "napi version"
|
||||
},
|
||||
"dependencies": {
|
||||
"@types/axios": "^0.14.0",
|
||||
"apache-arrow": "^15.0.0",
|
||||
"axios": "^1.7.2",
|
||||
"openai": "^4.29.2",
|
||||
"reflect-metadata": "^0.2.2"
|
||||
}
|
||||
|
||||
@@ -20,7 +20,6 @@ mod connection;
|
||||
mod error;
|
||||
mod index;
|
||||
mod iterator;
|
||||
pub mod merge;
|
||||
mod query;
|
||||
mod table;
|
||||
mod util;
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
#[napi]
|
||||
#[derive(Clone)]
|
||||
/// A builder used to create and run a merge insert operation
|
||||
pub struct NativeMergeInsertBuilder {
|
||||
pub(crate) inner: MergeInsertBuilder,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl NativeMergeInsertBuilder {
|
||||
#[napi]
|
||||
pub fn when_matched_update_all(&self, condition: Option<String>) -> Self {
|
||||
let mut this = self.clone();
|
||||
this.inner.when_matched_update_all(condition);
|
||||
this
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn when_not_matched_insert_all(&self) -> Self {
|
||||
let mut this = self.clone();
|
||||
this.inner.when_not_matched_insert_all();
|
||||
this
|
||||
}
|
||||
#[napi]
|
||||
pub fn when_not_matched_by_source_delete(&self, filter: Option<String>) -> Self {
|
||||
let mut this = self.clone();
|
||||
this.inner.when_not_matched_by_source_delete(filter);
|
||||
this
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn execute(&self, buf: Buffer) -> napi::Result<()> {
|
||||
let data = ipc_file_to_batches(buf.to_vec())
|
||||
.and_then(IntoArrow::into_arrow)
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
|
||||
let this = self.clone();
|
||||
|
||||
this.inner
|
||||
.execute(data)
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to execute merge insert: {}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MergeInsertBuilder> for NativeMergeInsertBuilder {
|
||||
fn from(inner: MergeInsertBuilder) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,6 @@ use napi_derive::napi;
|
||||
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::index::Index;
|
||||
use crate::merge::NativeMergeInsertBuilder;
|
||||
use crate::query::{Query, VectorQuery};
|
||||
|
||||
#[napi]
|
||||
@@ -329,12 +328,6 @@ impl Table {
|
||||
.map(IndexConfig::from)
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn merge_insert(&self, on: Vec<String>) -> napi::Result<NativeMergeInsertBuilder> {
|
||||
let on: Vec<_> = on.iter().map(String::as_str).collect();
|
||||
Ok(self.inner_ref()?.merge_insert(on.as_slice()).into())
|
||||
}
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.12.2-beta.2",
|
||||
"pylance==0.12.1",
|
||||
"ratelimiter~=1.0",
|
||||
"requests>=2.31.0",
|
||||
"retry>=0.9.2",
|
||||
@@ -57,10 +57,15 @@ tests = [
|
||||
"duckdb",
|
||||
"pytz",
|
||||
"polars>=0.19",
|
||||
"tantivy",
|
||||
"tantivy"
|
||||
]
|
||||
dev = ["ruff", "pre-commit"]
|
||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
|
||||
docs = [
|
||||
"mkdocs",
|
||||
"mkdocs-jupyter",
|
||||
"mkdocs-material",
|
||||
"mkdocstrings[python]",
|
||||
]
|
||||
clip = ["torch", "pillow", "open-clip"]
|
||||
embeddings = [
|
||||
"openai>=1.6.1",
|
||||
@@ -95,5 +100,5 @@ addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
|
||||
markers = [
|
||||
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
||||
"asyncio",
|
||||
"s3_test",
|
||||
"s3_test"
|
||||
]
|
||||
|
||||
@@ -337,6 +337,7 @@ class Table(ABC):
|
||||
For example, the following scan will be faster if the column ``my_col`` has
|
||||
a scalar index:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lancedb
|
||||
|
||||
@@ -347,6 +348,8 @@ class Table(ABC):
|
||||
Scalar indices can also speed up scans containing a vector search and a
|
||||
prefilter:
|
||||
|
||||
.. code-block::python
|
||||
|
||||
import lancedb
|
||||
|
||||
db = lancedb.connect("/data/lance")
|
||||
@@ -382,6 +385,7 @@ class Table(ABC):
|
||||
Examples
|
||||
--------
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import lance
|
||||
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
[toolchain]
|
||||
channel = "1.79.0"
|
||||
@@ -84,8 +84,7 @@ pub fn convert_polars_arrow_array_to_arrow_rs_array(
|
||||
arrow_datatype: arrow_schema::DataType,
|
||||
) -> std::result::Result<arrow_array::ArrayRef, arrow_schema::ArrowError> {
|
||||
let polars_c_array = polars_arrow::ffi::export_array_to_c(polars_array);
|
||||
// Safety: `polars_arrow::ffi::ArrowArray` has the same memory layout as `arrow::ffi::FFI_ArrowArray`.
|
||||
let arrow_c_array: arrow_data::ffi::FFI_ArrowArray = unsafe { mem::transmute(polars_c_array) };
|
||||
let arrow_c_array = unsafe { mem::transmute(polars_c_array) };
|
||||
Ok(arrow_array::make_array(unsafe {
|
||||
arrow::ffi::from_ffi_and_data_type(arrow_c_array, arrow_datatype)
|
||||
}?))
|
||||
@@ -97,8 +96,7 @@ fn convert_arrow_rs_array_to_polars_arrow_array(
|
||||
polars_arrow_dtype: polars::datatypes::ArrowDataType,
|
||||
) -> Result<Box<dyn polars_arrow::array::Array>> {
|
||||
let arrow_c_array = arrow::ffi::FFI_ArrowArray::new(&arrow_rs_array.to_data());
|
||||
// Safety: `polars_arrow::ffi::ArrowArray` has the same memory layout as `arrow::ffi::FFI_ArrowArray`.
|
||||
let polars_c_array: polars_arrow::ffi::ArrowArray = unsafe { mem::transmute(arrow_c_array) };
|
||||
let polars_c_array = unsafe { mem::transmute(arrow_c_array) };
|
||||
Ok(unsafe { polars_arrow::ffi::import_array_from_c(polars_c_array, polars_arrow_dtype) }?)
|
||||
}
|
||||
|
||||
@@ -106,9 +104,7 @@ fn convert_polars_arrow_field_to_arrow_rs_field(
|
||||
polars_arrow_field: polars_arrow::datatypes::Field,
|
||||
) -> Result<arrow_schema::Field> {
|
||||
let polars_c_schema = polars_arrow::ffi::export_field_to_c(&polars_arrow_field);
|
||||
// Safety: `polars_arrow::ffi::ArrowSchema` has the same memory layout as `arrow::ffi::FFI_ArrowSchema`.
|
||||
let arrow_c_schema: arrow::ffi::FFI_ArrowSchema =
|
||||
unsafe { mem::transmute::<_, _>(polars_c_schema) };
|
||||
let arrow_c_schema: arrow::ffi::FFI_ArrowSchema = unsafe { mem::transmute(polars_c_schema) };
|
||||
let arrow_rs_dtype = arrow_schema::DataType::try_from(&arrow_c_schema)?;
|
||||
Ok(arrow_schema::Field::new(
|
||||
polars_arrow_field.name,
|
||||
@@ -122,8 +118,6 @@ fn convert_arrow_rs_field_to_polars_arrow_field(
|
||||
) -> Result<polars_arrow::datatypes::Field> {
|
||||
let arrow_rs_dtype = arrow_rs_field.data_type();
|
||||
let arrow_c_schema = arrow::ffi::FFI_ArrowSchema::try_from(arrow_rs_dtype)?;
|
||||
// Safety: `polars_arrow::ffi::ArrowSchema` has the same memory layout as `arrow::ffi::FFI_ArrowSchema`.
|
||||
let polars_c_schema: polars_arrow::ffi::ArrowSchema =
|
||||
unsafe { mem::transmute::<_, _>(arrow_c_schema) };
|
||||
let polars_c_schema: polars_arrow::ffi::ArrowSchema = unsafe { mem::transmute(arrow_c_schema) };
|
||||
Ok(unsafe { polars_arrow::ffi::import_field_from_c(&polars_c_schema) }?)
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ use super::TableInternal;
|
||||
/// A builder used to create and run a merge insert operation
|
||||
///
|
||||
/// See [`super::Table::merge_insert`] for more context
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MergeInsertBuilder {
|
||||
table: Arc<dyn TableInternal>,
|
||||
pub(super) on: Vec<String>,
|
||||
|
||||
Reference in New Issue
Block a user