mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-28 17:30:42 +00:00
feat: page_token / limit to native table_names function. Use async table_names function from sync table_names function (#1059)
The synchronous table_names function in python lancedb relies on arrow's filesystem which behaves slightly differently than object_store. As a result, the function would not work properly in GCS. However, the async table_names function uses object_store directly and thus is accurate. In most cases we can fallback to using the async table_names function and so this PR does so. The one case we cannot is if the user is already in an async context (we can't start a new async event loop). Soon, we can just redirect those users to use the async API instead of the sync API and so that case will eventually go away. For now, we fallback to the old behavior.
This commit is contained in:
@@ -457,8 +457,8 @@ describe("when using two versions of arrow", function () {
|
||||
expect(lhs.nullable).toEqual(rhs.nullable);
|
||||
expect(lhs.typeId).toEqual(rhs.typeId);
|
||||
if ("children" in lhs.type && lhs.type.children !== null) {
|
||||
const lhs_children = lhs.type.children as Field[];
|
||||
lhs_children.forEach((child: Field, idx) => {
|
||||
const lhsChildren = lhs.type.children as Field[];
|
||||
lhsChildren.forEach((child: Field, idx) => {
|
||||
compareFields(child, rhs.type.children[idx]);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -66,9 +66,23 @@ describe("given a connection", () => {
|
||||
await expect(tbl.countRows()).resolves.toBe(1);
|
||||
});
|
||||
|
||||
it("should list tables", async () => {
|
||||
await db.createTable("test2", [{ id: 1 }, { id: 2 }]);
|
||||
await db.createTable("test1", [{ id: 1 }, { id: 2 }]);
|
||||
expect(await db.tableNames()).toEqual(["test1", "test2"]);
|
||||
it("should respect limit and page token when listing tables", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
|
||||
await db.createTable("b", [{ id: 1 }]);
|
||||
await db.createTable("a", [{ id: 1 }]);
|
||||
await db.createTable("c", [{ id: 1 }]);
|
||||
|
||||
let tables = await db.tableNames();
|
||||
expect(tables).toEqual(["a", "b", "c"]);
|
||||
|
||||
tables = await db.tableNames({ limit: 1 });
|
||||
expect(tables).toEqual(["a"]);
|
||||
|
||||
tables = await db.tableNames({ limit: 1, startAfter: "a" });
|
||||
expect(tables).toEqual(["b"]);
|
||||
|
||||
tables = await db.tableNames({ startAfter: "a" });
|
||||
expect(tables).toEqual(["b", "c"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -103,12 +103,12 @@ describe("Test creating index", () => {
|
||||
// TODO: check index type.
|
||||
|
||||
// Search without specifying the column
|
||||
const query_vector = data.toArray()[5].vec.toJSON();
|
||||
const rst = await tbl.query().nearestTo(query_vector).limit(2).toArrow();
|
||||
const queryVector = data.toArray()[5].vec.toJSON();
|
||||
const rst = await tbl.query().nearestTo(queryVector).limit(2).toArrow();
|
||||
expect(rst.numRows).toBe(2);
|
||||
|
||||
// Search with specifying the column
|
||||
const rst2 = await tbl.search(query_vector, "vec").limit(2).toArrow();
|
||||
const rst2 = await tbl.search(queryVector, "vec").limit(2).toArrow();
|
||||
expect(rst2.numRows).toBe(2);
|
||||
expect(rst.toString()).toEqual(rst2.toString());
|
||||
});
|
||||
@@ -169,6 +169,7 @@ describe("Test creating index", () => {
|
||||
);
|
||||
tbl
|
||||
.createIndex("vec")
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
.ivf_pq({ num_partitions: 2, num_sub_vectors: 2 })
|
||||
.build();
|
||||
|
||||
@@ -199,10 +200,10 @@ describe("Test creating index", () => {
|
||||
const query64 = Array(64)
|
||||
.fill(1)
|
||||
.map(() => Math.random());
|
||||
const rst64_1 = await tbl.query().nearestTo(query64).limit(2).toArrow();
|
||||
const rst64_2 = await tbl.search(query64, "vec2").limit(2).toArrow();
|
||||
expect(rst64_1.toString()).toEqual(rst64_2.toString());
|
||||
expect(rst64_1.numRows).toBe(2);
|
||||
const rst64Query = await tbl.query().nearestTo(query64).limit(2).toArrow();
|
||||
const rst64Search = await tbl.search(query64, "vec2").limit(2).toArrow();
|
||||
expect(rst64Query.toString()).toEqual(rst64Search.toString());
|
||||
expect(rst64Query.numRows).toBe(2);
|
||||
});
|
||||
|
||||
test("create scalar index", async () => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
// @ts-check
|
||||
|
||||
const eslint = require("@eslint/js");
|
||||
@@ -8,4 +9,9 @@ module.exports = tseslint.config(
|
||||
eslint.configs.recommended,
|
||||
eslintConfigPrettier,
|
||||
...tseslint.configs.recommended,
|
||||
{
|
||||
rules: {
|
||||
"@typescript-eslint/naming-convention": "error",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
@@ -35,6 +35,19 @@ export interface CreateTableOptions {
|
||||
existOk: boolean;
|
||||
}
|
||||
|
||||
export interface TableNamesOptions {
|
||||
/**
|
||||
* If present, only return names that come lexicographically after the
|
||||
* supplied value.
|
||||
*
|
||||
* This can be combined with limit to implement pagination by setting this to
|
||||
* the last table name from the previous page.
|
||||
*/
|
||||
startAfter?: string;
|
||||
/** An optional limit to the number of results to return. */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||
*
|
||||
@@ -80,9 +93,14 @@ export class Connection {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
/** List all the table names in this database. */
|
||||
async tableNames(): Promise<string[]> {
|
||||
return this.inner.tableNames();
|
||||
/** List all the table names in this database.
|
||||
*
|
||||
* Tables will be returned in lexicographical order.
|
||||
*
|
||||
* @param options Optional parameters to control the listing.
|
||||
*/
|
||||
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||
return this.inner.tableNames(options?.startAfter, options?.limit);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -27,6 +27,7 @@ export class OpenAIEmbeddingFunction implements EmbeddingFunction<string> {
|
||||
/**
|
||||
* @type {import("openai").default}
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
let Openai;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// TODO: Re-enable this as part of https://github.com/lancedb/lancedb/pull/1052
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
|
||||
import {
|
||||
MetricType,
|
||||
IndexBuilder as NativeBuilder,
|
||||
|
||||
2
nodejs/lancedb/native.d.ts
vendored
2
nodejs/lancedb/native.d.ts
vendored
@@ -78,7 +78,7 @@ export class Connection {
|
||||
isOpen(): boolean
|
||||
close(): void
|
||||
/** List all tables in the dataset. */
|
||||
tableNames(): Promise<Array<string>>
|
||||
tableNames(startAfter?: string | undefined | null, limit?: number | undefined | null): Promise<Array<string>>
|
||||
/**
|
||||
* Create table from a Apache Arrow IPC (file) buffer.
|
||||
*
|
||||
|
||||
@@ -20,7 +20,7 @@ import {
|
||||
} from "./native";
|
||||
|
||||
class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||
private promised_inner?: Promise<NativeBatchIterator>;
|
||||
private promisedInner?: Promise<NativeBatchIterator>;
|
||||
private inner?: NativeBatchIterator;
|
||||
|
||||
constructor(
|
||||
@@ -29,13 +29,13 @@ class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||
) {
|
||||
// TODO: check promise reliably so we dont need to pass two arguments.
|
||||
this.inner = inner;
|
||||
this.promised_inner = promise;
|
||||
this.promisedInner = promise;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
async next(): Promise<IteratorResult<RecordBatch<any>>> {
|
||||
if (this.inner === undefined) {
|
||||
this.inner = await this.promised_inner;
|
||||
this.inner = await this.promisedInner;
|
||||
}
|
||||
if (this.inner === undefined) {
|
||||
throw new Error("Invalid iterator state state");
|
||||
@@ -115,8 +115,8 @@ export class Query implements AsyncIterable<RecordBatch> {
|
||||
/**
|
||||
* Set the refine factor for the query.
|
||||
*/
|
||||
refineFactor(refine_factor: number): Query {
|
||||
this.inner.refineFactor(refine_factor);
|
||||
refineFactor(refineFactor: number): Query {
|
||||
this.inner.refineFactor(refineFactor);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -168,6 +168,7 @@ function sanitizeTimestamp(typeLike: object) {
|
||||
|
||||
function sanitizeTypedTimestamp(
|
||||
typeLike: object,
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
Datatype:
|
||||
| typeof TimestampNanosecond
|
||||
| typeof TimestampMicrosecond
|
||||
@@ -235,6 +236,7 @@ function sanitizeUnion(typeLike: object) {
|
||||
|
||||
function sanitizeTypedUnion(
|
||||
typeLike: object,
|
||||
// eslint-disable-next-line @typescript-eslint/naming-convention
|
||||
UnionType: typeof DenseUnion | typeof SparseUnion,
|
||||
) {
|
||||
if (!("typeIds" in typeLike)) {
|
||||
|
||||
@@ -89,9 +89,19 @@ impl Connection {
|
||||
|
||||
/// List all tables in the dataset.
|
||||
#[napi]
|
||||
pub async fn table_names(&self) -> napi::Result<Vec<String>> {
|
||||
self.get_inner()?
|
||||
.table_names()
|
||||
pub async fn table_names(
|
||||
&self,
|
||||
start_after: Option<String>,
|
||||
limit: Option<u32>,
|
||||
) -> napi::Result<Vec<String>> {
|
||||
let mut op = self.get_inner()?.table_names();
|
||||
if let Some(start_after) = start_after {
|
||||
op = op.start_after(start_after);
|
||||
}
|
||||
if let Some(limit) = limit {
|
||||
op = op.limit(limit);
|
||||
}
|
||||
op.execute()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("{}", e)))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user