feat: add flag to enable faster manifest paths (#1612)

The new V2 manifest path scheme makes discovering the latest version of
a table constant time on object stores, regardless of the number of
versions in the table. See benchmarks in the PR here:
https://github.com/lancedb/lance/pull/2798

Closes #1583
This commit is contained in:
Will Jones
2024-09-09 11:34:36 -07:00
committed by GitHub
parent 029b01bbbf
commit 2a6586d6fb
16 changed files with 292 additions and 2 deletions

View File

@@ -12,9 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { readdirSync } from "fs";
import { Field, Float64, Schema } from "apache-arrow";
import * as tmp from "tmp";
import { Connection, Table, connect } from "../lancedb";
import { LocalTable } from "../lancedb/table";
describe("when connecting", () => {
let tmpDir: tmp.DirResult;
@@ -134,4 +136,57 @@ describe("given a connection", () => {
await table.add(data);
await expect(isV2(table)).resolves.toBe(true);
});
it("should be able to create tables with V2 manifest paths", async () => {
const db = await connect(tmpDir.name);
let table = (await db.createEmptyTable(
"test_manifest_paths_v2_empty",
new Schema([new Field("id", new Float64(), true)]),
{
enableV2ManifestPaths: true,
},
)) as LocalTable;
expect(await table.usesV2ManifestPaths()).toBe(true);
let manifestDir =
tmpDir.name + "/test_manifest_paths_v2_empty.lance/_versions";
readdirSync(manifestDir).forEach((file) => {
expect(file).toMatch(/^\d{20}\.manifest$/);
});
table = (await db.createTable("test_manifest_paths_v2", [{ id: 1 }], {
enableV2ManifestPaths: true,
})) as LocalTable;
expect(await table.usesV2ManifestPaths()).toBe(true);
manifestDir = tmpDir.name + "/test_manifest_paths_v2.lance/_versions";
readdirSync(manifestDir).forEach((file) => {
expect(file).toMatch(/^\d{20}\.manifest$/);
});
});
it("should be able to migrate tables to the V2 manifest paths", async () => {
const db = await connect(tmpDir.name);
const table = (await db.createEmptyTable(
"test_manifest_path_migration",
new Schema([new Field("id", new Float64(), true)]),
{
enableV2ManifestPaths: false,
},
)) as LocalTable;
expect(await table.usesV2ManifestPaths()).toBe(false);
const manifestDir =
tmpDir.name + "/test_manifest_path_migration.lance/_versions";
readdirSync(manifestDir).forEach((file) => {
expect(file).toMatch(/^\d\.manifest$/);
});
await table.migrateManifestPathsV2();
expect(await table.usesV2ManifestPaths()).toBe(true);
readdirSync(manifestDir).forEach((file) => {
expect(file).toMatch(/^\d{20}\.manifest$/);
});
});
});