mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 12:22:59 +00:00
feat: {add|alter|drop}_columns APIs (#1015)
Initial work for #959. This exposes the basic functionality for each in all of the APIs. Will add user guide documentation in a later PR.
This commit is contained in:
@@ -42,7 +42,10 @@ const {
|
||||
tableCompactFiles,
|
||||
tableListIndices,
|
||||
tableIndexStats,
|
||||
tableSchema
|
||||
tableSchema,
|
||||
tableAddColumns,
|
||||
tableAlterColumns,
|
||||
tableDropColumns
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
} = require('../native.js')
|
||||
|
||||
@@ -500,6 +503,59 @@ export interface Table<T = number[]> {
|
||||
filter(value: string): Query<T>
|
||||
|
||||
schema: Promise<Schema>
|
||||
|
||||
// TODO: Support BatchUDF
|
||||
/**
|
||||
* Add new columns with defined values.
|
||||
*
|
||||
* @param newColumnTransforms pairs of column names and the SQL expression to use
|
||||
* to calculate the value of the new column. These
|
||||
* expressions will be evaluated for each row in the
|
||||
* table, and can reference existing columns in the table.
|
||||
*/
|
||||
addColumns(newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void>
|
||||
|
||||
/**
|
||||
* Alter the name or nullability of columns.
|
||||
*
|
||||
* @param columnAlterations One or more alterations to apply to columns.
|
||||
*/
|
||||
alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>
|
||||
|
||||
/**
|
||||
* Drop one or more columns from the dataset
|
||||
*
|
||||
* This is a metadata-only operation and does not remove the data from the
|
||||
* underlying storage. In order to remove the data, you must subsequently
|
||||
* call ``compact_files`` to rewrite the data without the removed columns and
|
||||
* then call ``cleanup_files`` to remove the old files.
|
||||
*
|
||||
* @param columnNames The names of the columns to drop. These can be nested
|
||||
* column references (e.g. "a.b.c") or top-level column
|
||||
* names (e.g. "a").
|
||||
*/
|
||||
dropColumns(columnNames: string[]): Promise<void>
|
||||
}
|
||||
|
||||
/**
|
||||
* A definition of a column alteration. The alteration changes the column at
|
||||
* `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||
* and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||
* must be provided.
|
||||
*/
|
||||
export interface ColumnAlteration {
|
||||
/**
|
||||
* The path to the column to alter. This is a dot-separated path to the column.
|
||||
* If it is a top-level column then it is just the name of the column. If it is
|
||||
* a nested column then it is the path to the column, e.g. "a.b.c" for a column
|
||||
* `c` nested inside a column `b` nested inside a column `a`.
|
||||
*/
|
||||
path: string
|
||||
rename?: string
|
||||
/**
|
||||
* Set the new nullability. Note that a nullable column cannot be made non-nullable.
|
||||
*/
|
||||
nullable?: boolean
|
||||
}
|
||||
|
||||
export interface UpdateArgs {
|
||||
@@ -1028,6 +1084,18 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
|
||||
return tableAddColumns.call(this._tbl, newColumnTransforms)
|
||||
}
|
||||
|
||||
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||
return tableAlterColumns.call(this._tbl, columnAlterations)
|
||||
}
|
||||
|
||||
async dropColumns (columnNames: string[]): Promise<void> {
|
||||
return tableDropColumns.call(this._tbl, columnNames)
|
||||
}
|
||||
}
|
||||
|
||||
export interface CleanupStats {
|
||||
|
||||
@@ -25,7 +25,8 @@ import {
|
||||
type UpdateArgs,
|
||||
type UpdateSqlArgs,
|
||||
makeArrowTable,
|
||||
type MergeInsertArgs
|
||||
type MergeInsertArgs,
|
||||
type ColumnAlteration
|
||||
} from '../index'
|
||||
import { Query } from '../query'
|
||||
|
||||
@@ -474,4 +475,16 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
||||
numUnindexedRows: results.data.num_unindexed_rows
|
||||
}
|
||||
}
|
||||
|
||||
async addColumns (newColumnTransforms: Array<{ name: string, valueSql: string }>): Promise<void> {
|
||||
throw new Error('Add columns is not yet supported in LanceDB Cloud.')
|
||||
}
|
||||
|
||||
async alterColumns (columnAlterations: ColumnAlteration[]): Promise<void> {
|
||||
throw new Error('Alter columns is not yet supported in LanceDB Cloud.')
|
||||
}
|
||||
|
||||
async dropColumns (columnNames: string[]): Promise<void> {
|
||||
throw new Error('Drop columns is not yet supported in LanceDB Cloud.')
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,8 +37,10 @@ import {
|
||||
Utf8,
|
||||
Table as ArrowTable,
|
||||
vectorFromArray,
|
||||
Float64,
|
||||
Float32,
|
||||
Float16
|
||||
Float16,
|
||||
Int64
|
||||
} from 'apache-arrow'
|
||||
|
||||
const expect = chai.expect
|
||||
@@ -1057,3 +1059,63 @@ describe('Compact and cleanup', function () {
|
||||
assert.equal(await table.countRows(), 3)
|
||||
})
|
||||
})
|
||||
|
||||
describe('schema evolution', function () {
|
||||
// Create a new sample table
|
||||
it('can add a new column to the schema', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
const table = await con.createTable('vectors', [
|
||||
{ id: 1n, vector: [0.1, 0.2] }
|
||||
])
|
||||
|
||||
await table.addColumns([{ name: 'price', valueSql: 'cast(10.0 as float)' }])
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field('id', new Int64()),
|
||||
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||
new Field('price', new Float32())
|
||||
])
|
||||
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||
})
|
||||
|
||||
it('can alter the columns in the schema', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
const schema = new Schema([
|
||||
new Field('id', new Int64(), false),
|
||||
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||
new Field('price', new Float64(), false)
|
||||
])
|
||||
const table = await con.createTable('vectors', [
|
||||
{ id: 1n, vector: [0.1, 0.2], price: 10.0 }
|
||||
])
|
||||
expect(await table.schema).to.deep.equal(schema)
|
||||
|
||||
await table.alterColumns([
|
||||
{ path: 'id', rename: 'new_id' },
|
||||
{ path: 'price', nullable: true }
|
||||
])
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field('new_id', new Int64(), false),
|
||||
new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true))),
|
||||
new Field('price', new Float64(), true)
|
||||
])
|
||||
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||
})
|
||||
|
||||
it('can drop a column from the schema', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
const table = await con.createTable('vectors', [
|
||||
{ id: 1n, vector: [0.1, 0.2] }
|
||||
])
|
||||
await table.dropColumns(['vector'])
|
||||
|
||||
const expectedSchema = new Schema([
|
||||
new Field('id', new Int64(), false)
|
||||
])
|
||||
expect(await table.schema).to.deep.equal(expectedSchema)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user