BREAKING CHANGE: Check if remote table exists when opening (with caching) (#1214)

- make open table behaviour consistent:
- remote tables will check if the table exists by calling /describe and
throwing an error if the call doesn't succeed
- this is similar to the behaviour for local tables where we will raise
an exception when opening the table if the local dataset doesn't exist
- The table names are cached in the client with a TTL
- Also fixes a small bug where if the remote error response was
deserialized from JSON as an object, we'd print it resulting in the
unhelpful error message: `Error: Server Error, status: 404, message: Not
Found: [object Object]`
This commit is contained in:
Bert
2024-04-10 11:54:47 -04:00
committed by GitHub
parent 8a1227030a
commit 25dea4e859
5 changed files with 76 additions and 13 deletions

View File

@@ -111,7 +111,11 @@ async function decodeErrorData(
if (responseType === 'arraybuffer') {
return new TextDecoder().decode(errorData)
} else {
return errorData
if (typeof errorData === 'object') {
return JSON.stringify(errorData)
}
return errorData
}
}

View File

@@ -38,7 +38,7 @@ import {
fromRecordsToStreamBuffer,
fromTableToStreamBuffer
} from '../arrow'
import { toSQL } from '../util'
import { toSQL, TTLCache } from '../util'
import { type HttpMiddleware } from '../middleware'
/**
@@ -47,6 +47,7 @@ import { type HttpMiddleware } from '../middleware'
export class RemoteConnection implements Connection {
private _client: HttpLancedbClient
private readonly _dbName: string
private readonly _tableCache = new TTLCache(300_000)
constructor (opts: ConnectionOptions) {
if (!opts.uri.startsWith('db://')) {
@@ -89,6 +90,9 @@ export class RemoteConnection implements Connection {
page_token: pageToken
})
const body = await response.body()
for (const table of body.tables) {
this._tableCache.set(table, true)
}
return body.tables
}
@@ -101,6 +105,12 @@ export class RemoteConnection implements Connection {
name: string,
embeddings?: EmbeddingFunction<T>
): Promise<Table<T>> {
// check if the table exists
if (this._tableCache.get(name) === undefined) {
await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
this._tableCache.set(name, true)
}
if (embeddings !== undefined) {
return new RemoteTable(this._client, name, embeddings)
} else {
@@ -169,6 +179,7 @@ export class RemoteConnection implements Connection {
)
}
this._tableCache.set(tableName, true)
if (embeddings === undefined) {
return new RemoteTable(this._client, tableName)
} else {
@@ -178,6 +189,7 @@ export class RemoteConnection implements Connection {
async dropTable (name: string): Promise<void> {
await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
this._tableCache.delete(name)
}
withMiddleware (middleware: HttpMiddleware): Connection {

View File

@@ -42,6 +42,7 @@ import {
Float16,
Int64
} from 'apache-arrow'
import type { RemoteRequest, RemoteResponse } from '../middleware'
const expect = chai.expect
const assert = chai.assert
@@ -913,7 +914,22 @@ describe('Remote LanceDB client', function () {
}
// Search
const table = await con.openTable('vectors')
const table = await con.withMiddleware(new (class {
async onRemoteRequest(req: RemoteRequest, next: (req: RemoteRequest) => Promise<RemoteResponse>) {
// intercept call to check if the table exists and make the call succeed
if (req.uri.endsWith('/describe/')) {
return {
status: 200,
statusText: 'OK',
headers: new Map(),
body: async () => ({})
}
}
return await next(req)
}
})()).openTable('vectors')
try {
await table.search([0.1, 0.3]).execute()
} catch (err) {

View File

@@ -42,3 +42,36 @@ export function toSQL (value: Literal): string {
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
}
export class TTLCache {
private readonly cache: Map<string, { value: any, expires: number }>
/**
* @param ttl Time to live in milliseconds
*/
constructor (private readonly ttl: number) {
this.cache = new Map()
}
get (key: string): any | undefined {
const entry = this.cache.get(key)
if (entry === undefined) {
return undefined
}
if (entry.expires < Date.now()) {
this.cache.delete(key)
return undefined
}
return entry.value
}
set (key: string, value: any): void {
this.cache.set(key, { value, expires: Date.now() + this.ttl })
}
delete (key: string): void {
this.cache.delete(key)
}
}