mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-19 21:10:41 +00:00
feat: dynamodb commit store support (#1410)
This allows users to specify URIs like: ``` s3+ddb://my_bucket/path?ddbTableName=myCommitTable ``` and it will support concurrent writes in S3. * [x] Add dynamodb integration tests * [x] Add modifications to get it working in Python sync API * [x] Added section in documentation describing how to configure. Closes #534 --------- Co-authored-by: universalmind303 <cory.grinstead@gmail.com>
This commit is contained in:
@@ -14,6 +14,11 @@
|
||||
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
|
||||
import {
|
||||
CreateTableCommand,
|
||||
DeleteTableCommand,
|
||||
DynamoDBClient,
|
||||
} from "@aws-sdk/client-dynamodb";
|
||||
import {
|
||||
CreateKeyCommand,
|
||||
KMSClient,
|
||||
@@ -38,6 +43,7 @@ const CONFIG = {
|
||||
awsAccessKeyId: "ACCESSKEY",
|
||||
awsSecretAccessKey: "SECRETKEY",
|
||||
awsEndpoint: "http://127.0.0.1:4566",
|
||||
dynamodbEndpoint: "http://127.0.0.1:4566",
|
||||
awsRegion: "us-east-1",
|
||||
};
|
||||
|
||||
@@ -66,7 +72,6 @@ class S3Bucket {
|
||||
} catch {
|
||||
// It's fine if the bucket doesn't exist
|
||||
}
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new CreateBucketCommand({ Bucket: name }));
|
||||
return new S3Bucket(name);
|
||||
}
|
||||
@@ -79,32 +84,27 @@ class S3Bucket {
|
||||
static async deleteBucket(client: S3Client, name: string) {
|
||||
// Must delete all objects before we can delete the bucket
|
||||
const objects = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new ListObjectsV2Command({ Bucket: name }),
|
||||
);
|
||||
if (objects.Contents) {
|
||||
for (const object of objects.Contents) {
|
||||
await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new DeleteObjectCommand({ Bucket: name, Key: object.Key }),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new DeleteBucketCommand({ Bucket: name }));
|
||||
}
|
||||
|
||||
public async assertAllEncrypted(path: string, keyId: string) {
|
||||
const client = S3Bucket.s3Client();
|
||||
const objects = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new ListObjectsV2Command({ Bucket: this.name, Prefix: path }),
|
||||
);
|
||||
if (objects.Contents) {
|
||||
for (const object of objects.Contents) {
|
||||
const metadata = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new HeadObjectCommand({ Bucket: this.name, Key: object.Key }),
|
||||
);
|
||||
expect(metadata.ServerSideEncryption).toBe("aws:kms");
|
||||
@@ -143,7 +143,6 @@ class KmsKey {
|
||||
|
||||
public async delete() {
|
||||
const client = KmsKey.kmsClient();
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new ScheduleKeyDeletionCommand({ KeyId: this.keyId }));
|
||||
}
|
||||
}
|
||||
@@ -224,3 +223,91 @@ maybeDescribe("storage_options", () => {
|
||||
await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
|
||||
});
|
||||
});
|
||||
|
||||
class DynamoDBCommitTable {
|
||||
name: string;
|
||||
constructor(name: string) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
static dynamoClient() {
|
||||
return new DynamoDBClient({
|
||||
region: CONFIG.awsRegion,
|
||||
credentials: {
|
||||
accessKeyId: CONFIG.awsAccessKeyId,
|
||||
secretAccessKey: CONFIG.awsSecretAccessKey,
|
||||
},
|
||||
endpoint: CONFIG.awsEndpoint,
|
||||
});
|
||||
}
|
||||
|
||||
public static async create(name: string): Promise<DynamoDBCommitTable> {
|
||||
const client = DynamoDBCommitTable.dynamoClient();
|
||||
const command = new CreateTableCommand({
|
||||
TableName: name,
|
||||
AttributeDefinitions: [
|
||||
{
|
||||
AttributeName: "base_uri",
|
||||
AttributeType: "S",
|
||||
},
|
||||
{
|
||||
AttributeName: "version",
|
||||
AttributeType: "N",
|
||||
},
|
||||
],
|
||||
KeySchema: [
|
||||
{ AttributeName: "base_uri", KeyType: "HASH" },
|
||||
{ AttributeName: "version", KeyType: "RANGE" },
|
||||
],
|
||||
ProvisionedThroughput: {
|
||||
ReadCapacityUnits: 1,
|
||||
WriteCapacityUnits: 1,
|
||||
},
|
||||
});
|
||||
await client.send(command);
|
||||
return new DynamoDBCommitTable(name);
|
||||
}
|
||||
|
||||
public async delete() {
|
||||
const client = DynamoDBCommitTable.dynamoClient();
|
||||
await client.send(new DeleteTableCommand({ TableName: this.name }));
|
||||
}
|
||||
}
|
||||
|
||||
maybeDescribe("DynamoDB Lock", () => {
|
||||
let bucket: S3Bucket;
|
||||
let commitTable: DynamoDBCommitTable;
|
||||
|
||||
beforeAll(async () => {
|
||||
bucket = await S3Bucket.create("lancedb2");
|
||||
commitTable = await DynamoDBCommitTable.create("commitTable");
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await commitTable.delete();
|
||||
await bucket.delete();
|
||||
});
|
||||
|
||||
it("can be used to configure a DynamoDB table for commit log", async () => {
|
||||
const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
|
||||
const db = await connect(uri, {
|
||||
storageOptions: CONFIG,
|
||||
readConsistencyInterval: 0,
|
||||
});
|
||||
|
||||
const table = await db.createTable("test", [{ a: 1, b: 2 }]);
|
||||
|
||||
// 5 concurrent appends
|
||||
const futs = Array.from({ length: 5 }, async () => {
|
||||
// Open a table so each append has a separate table reference. Otherwise
|
||||
// they will share the same table reference and the internal ReadWriteLock
|
||||
// will prevent any real concurrency.
|
||||
const table = await db.openTable("test");
|
||||
await table.add([{ a: 2, b: 3 }]);
|
||||
});
|
||||
await Promise.all(futs);
|
||||
|
||||
const rowCount = await table.countRows();
|
||||
expect(rowCount).toBe(6);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/1.7.3/schema.json",
|
||||
"$schema": "https://biomejs.dev/schemas/1.8.3/schema.json",
|
||||
"organizeImports": {
|
||||
"enabled": true
|
||||
},
|
||||
@@ -100,6 +100,16 @@
|
||||
"globals": []
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"include": ["__test__/s3_integration.test.ts"],
|
||||
"linter": {
|
||||
"rules": {
|
||||
"style": {
|
||||
"useNamingConvention": "off"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"include": [
|
||||
"**/*.ts",
|
||||
|
||||
@@ -55,7 +55,7 @@ export class RestfulLanceDBClient {
|
||||
return axios.create({
|
||||
baseURL: this.url,
|
||||
headers: {
|
||||
// biome-ignore lint/style/useNamingConvention: external api
|
||||
// biome-ignore lint: external API
|
||||
Authorization: `Bearer ${this.#apiKey}`,
|
||||
},
|
||||
transformResponse: decodeErrorData,
|
||||
|
||||
1391
nodejs/package-lock.json
generated
1391
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -34,6 +34,7 @@
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-kms": "^3.33.0",
|
||||
"@aws-sdk/client-s3": "^3.33.0",
|
||||
"@aws-sdk/client-dynamodb": "^3.33.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "^2.18.0",
|
||||
@@ -68,7 +69,7 @@
|
||||
"lint-ci": "biome ci .",
|
||||
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
|
||||
"lint": "biome check . && biome format .",
|
||||
"lint-fix": "biome check --apply-unsafe . && biome format --write .",
|
||||
"lint-fix": "biome check --write . && biome format --write .",
|
||||
"prepublishOnly": "napi prepublish -t npm",
|
||||
"test": "jest --verbose",
|
||||
"integration": "S3_TEST=1 npm run test",
|
||||
|
||||
Reference in New Issue
Block a user