mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 03:42:57 +00:00
feat: dynamodb commit store support (#1410)
This allows users to specify URIs like: ``` s3+ddb://my_bucket/path?ddbTableName=myCommitTable ``` and it will support concurrent writes in S3. * [x] Add dynamodb integration tests * [x] Add modifications to get it working in Python sync API * [x] Added section in documentation describing how to configure. Closes #534 --------- Co-authored-by: universalmind303 <cory.grinstead@gmail.com>
This commit is contained in:
@@ -14,6 +14,11 @@
|
||||
|
||||
/* eslint-disable @typescript-eslint/naming-convention */
|
||||
|
||||
import {
|
||||
CreateTableCommand,
|
||||
DeleteTableCommand,
|
||||
DynamoDBClient,
|
||||
} from "@aws-sdk/client-dynamodb";
|
||||
import {
|
||||
CreateKeyCommand,
|
||||
KMSClient,
|
||||
@@ -38,6 +43,7 @@ const CONFIG = {
|
||||
awsAccessKeyId: "ACCESSKEY",
|
||||
awsSecretAccessKey: "SECRETKEY",
|
||||
awsEndpoint: "http://127.0.0.1:4566",
|
||||
dynamodbEndpoint: "http://127.0.0.1:4566",
|
||||
awsRegion: "us-east-1",
|
||||
};
|
||||
|
||||
@@ -66,7 +72,6 @@ class S3Bucket {
|
||||
} catch {
|
||||
// It's fine if the bucket doesn't exist
|
||||
}
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new CreateBucketCommand({ Bucket: name }));
|
||||
return new S3Bucket(name);
|
||||
}
|
||||
@@ -79,32 +84,27 @@ class S3Bucket {
|
||||
static async deleteBucket(client: S3Client, name: string) {
|
||||
// Must delete all objects before we can delete the bucket
|
||||
const objects = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new ListObjectsV2Command({ Bucket: name }),
|
||||
);
|
||||
if (objects.Contents) {
|
||||
for (const object of objects.Contents) {
|
||||
await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new DeleteObjectCommand({ Bucket: name, Key: object.Key }),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new DeleteBucketCommand({ Bucket: name }));
|
||||
}
|
||||
|
||||
public async assertAllEncrypted(path: string, keyId: string) {
|
||||
const client = S3Bucket.s3Client();
|
||||
const objects = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new ListObjectsV2Command({ Bucket: this.name, Prefix: path }),
|
||||
);
|
||||
if (objects.Contents) {
|
||||
for (const object of objects.Contents) {
|
||||
const metadata = await client.send(
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
new HeadObjectCommand({ Bucket: this.name, Key: object.Key }),
|
||||
);
|
||||
expect(metadata.ServerSideEncryption).toBe("aws:kms");
|
||||
@@ -143,7 +143,6 @@ class KmsKey {
|
||||
|
||||
public async delete() {
|
||||
const client = KmsKey.kmsClient();
|
||||
// biome-ignore lint/style/useNamingConvention: we dont control s3's api
|
||||
await client.send(new ScheduleKeyDeletionCommand({ KeyId: this.keyId }));
|
||||
}
|
||||
}
|
||||
@@ -224,3 +223,91 @@ maybeDescribe("storage_options", () => {
|
||||
await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
|
||||
});
|
||||
});
|
||||
|
||||
class DynamoDBCommitTable {
|
||||
name: string;
|
||||
constructor(name: string) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
static dynamoClient() {
|
||||
return new DynamoDBClient({
|
||||
region: CONFIG.awsRegion,
|
||||
credentials: {
|
||||
accessKeyId: CONFIG.awsAccessKeyId,
|
||||
secretAccessKey: CONFIG.awsSecretAccessKey,
|
||||
},
|
||||
endpoint: CONFIG.awsEndpoint,
|
||||
});
|
||||
}
|
||||
|
||||
public static async create(name: string): Promise<DynamoDBCommitTable> {
|
||||
const client = DynamoDBCommitTable.dynamoClient();
|
||||
const command = new CreateTableCommand({
|
||||
TableName: name,
|
||||
AttributeDefinitions: [
|
||||
{
|
||||
AttributeName: "base_uri",
|
||||
AttributeType: "S",
|
||||
},
|
||||
{
|
||||
AttributeName: "version",
|
||||
AttributeType: "N",
|
||||
},
|
||||
],
|
||||
KeySchema: [
|
||||
{ AttributeName: "base_uri", KeyType: "HASH" },
|
||||
{ AttributeName: "version", KeyType: "RANGE" },
|
||||
],
|
||||
ProvisionedThroughput: {
|
||||
ReadCapacityUnits: 1,
|
||||
WriteCapacityUnits: 1,
|
||||
},
|
||||
});
|
||||
await client.send(command);
|
||||
return new DynamoDBCommitTable(name);
|
||||
}
|
||||
|
||||
public async delete() {
|
||||
const client = DynamoDBCommitTable.dynamoClient();
|
||||
await client.send(new DeleteTableCommand({ TableName: this.name }));
|
||||
}
|
||||
}
|
||||
|
||||
maybeDescribe("DynamoDB Lock", () => {
|
||||
let bucket: S3Bucket;
|
||||
let commitTable: DynamoDBCommitTable;
|
||||
|
||||
beforeAll(async () => {
|
||||
bucket = await S3Bucket.create("lancedb2");
|
||||
commitTable = await DynamoDBCommitTable.create("commitTable");
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await commitTable.delete();
|
||||
await bucket.delete();
|
||||
});
|
||||
|
||||
it("can be used to configure a DynamoDB table for commit log", async () => {
|
||||
const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
|
||||
const db = await connect(uri, {
|
||||
storageOptions: CONFIG,
|
||||
readConsistencyInterval: 0,
|
||||
});
|
||||
|
||||
const table = await db.createTable("test", [{ a: 1, b: 2 }]);
|
||||
|
||||
// 5 concurrent appends
|
||||
const futs = Array.from({ length: 5 }, async () => {
|
||||
// Open a table so each append has a separate table reference. Otherwise
|
||||
// they will share the same table reference and the internal ReadWriteLock
|
||||
// will prevent any real concurrency.
|
||||
const table = await db.openTable("test");
|
||||
await table.add([{ a: 2, b: 3 }]);
|
||||
});
|
||||
await Promise.all(futs);
|
||||
|
||||
const rowCount = await table.countRows();
|
||||
expect(rowCount).toBe(6);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user