mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 22:09:58 +00:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7732f7d41c | ||
|
|
5ca98c326f | ||
|
|
b55db397eb | ||
|
|
c04d72ac8a | ||
|
|
28b02fb72a | ||
|
|
f3cf986777 | ||
|
|
c73fcc8898 | ||
|
|
cd9debc3b7 | ||
|
|
26a97ba997 | ||
|
|
ce19fedb08 | ||
|
|
14e8e48de2 | ||
|
|
c30faf6083 | ||
|
|
64a4f025bb | ||
|
|
6dc968e7d3 | ||
|
|
06b5b69f1e | ||
|
|
6bd3a838fc | ||
|
|
f36fea8f20 |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.3.3
|
||||
current_version = 0.3.4
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -5,9 +5,9 @@ exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.8.6", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.6" }
|
||||
lance-testing = { "version" = "=0.8.6" }
|
||||
lance = { "version" = "=0.8.8", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.8" }
|
||||
lance-testing = { "version" = "=0.8.8" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "47.0.0", optional = false }
|
||||
arrow-array = "47.0"
|
||||
@@ -18,8 +18,8 @@ arrow-schema = "47.0"
|
||||
arrow-arith = "47.0"
|
||||
arrow-cast = "47.0"
|
||||
chrono = "0.4.23"
|
||||
half = { "version" = "=2.2.1", default-features = false, features = [
|
||||
"num-traits"
|
||||
half = { "version" = "=2.3.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
log = "0.4"
|
||||
object_store = "0.7.1"
|
||||
|
||||
@@ -73,12 +73,14 @@ nav:
|
||||
- Vector Search: search.md
|
||||
- SQL filters: sql.md
|
||||
- Indexing: ann_indexes.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- 🧬 Embeddings:
|
||||
- embeddings/index.md
|
||||
- Ingest Embedding Functions: embeddings/embedding_functions.md
|
||||
- Available Functions: embeddings/default_embedding_functions.md
|
||||
- Create Custom Embedding Functions: embeddings/api.md
|
||||
- Example- MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- Example - Multi-lingual semantic search: notebooks/multi_lingual_example.ipynb
|
||||
- Example - MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- 🔍 Python full-text search: fts.md
|
||||
- 🔌 Integrations:
|
||||
- integrations/index.md
|
||||
@@ -110,12 +112,14 @@ nav:
|
||||
- Vector Search: search.md
|
||||
- SQL filters: sql.md
|
||||
- Indexing: ann_indexes.md
|
||||
- Versioning & Reproducibility: notebooks/reproducibility.ipynb
|
||||
- Embeddings:
|
||||
- embeddings/index.md
|
||||
- Ingest Embedding Functions: embeddings/embedding_functions.md
|
||||
- Available Functions: embeddings/default_embedding_functions.md
|
||||
- Create Custom Embedding Functions: embeddings/api.md
|
||||
- Example- MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- Example - Multi-lingual semantic search: notebooks/multi_lingual_example.ipynb
|
||||
- Example - MultiModal CLIP Embeddings: notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- Python full-text search: fts.md
|
||||
- Integrations:
|
||||
- integrations/index.md
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88c1af18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Example - MultiModal CLIP Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6b5d346-2c2a-4341-a132-00e53543f8d1",
|
||||
|
||||
604
docs/src/notebooks/multi_lingual_example.ipynb
Normal file
604
docs/src/notebooks/multi_lingual_example.ipynb
Normal file
File diff suppressed because one or more lines are too long
1189
docs/src/notebooks/reproducibility.ipynb
Normal file
1189
docs/src/notebooks/reproducibility.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.3.2",
|
||||
"version": "0.3.3",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.3.2",
|
||||
"version": "0.3.3",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -53,11 +53,11 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.3.2",
|
||||
"@lancedb/vectordb-darwin-x64": "0.3.2",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.3.2",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.3.2",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.3.2"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.3.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.3.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.3.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.3.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@apache-arrow/ts": {
|
||||
@@ -317,9 +317,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.2.tgz",
|
||||
"integrity": "sha512-CDh+sU2k4xVfWauwDZnybma8AJ+Q2i0SzHg05BwgDcani7I0k60NjJ5GobpgQ38xOiEmwHllES1xs4NRh+1YkA==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.3.tgz",
|
||||
"integrity": "sha512-nvyj7xNX2/wb/PH5TjyhLR/NQ1jVuoBw2B5UaSg7qf8Tnm5SSXWQ7F25RVKcKwh72fz1qB+CWW24ftZnRzbT/Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -329,9 +329,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.2.tgz",
|
||||
"integrity": "sha512-xevyA+M/UE8ttaNkx68AyIUKlyWMhIzOECx0hbyN1zfShJe2UcunQcmbM1NxUi7EywodByyiP7bfMI1ZR1Y4Mw==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.3.tgz",
|
||||
"integrity": "sha512-7CW+nILyPHp6cua0Rl0xaTDWw/vajEn/jCsEjFYgDmE+rtf5Z5Fum41FxR9C2TtIAvUK+nWb5mkYeOLqU6vRvg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -341,9 +341,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.2.tgz",
|
||||
"integrity": "sha512-mSKkQ/p6UTSLwWzfZMBS7wA6Gf335KljXLaOhdT4TUI/jC6e9/cvZKkXRgpdE9/gvfl4/WVzKY7sg3+azDYQ+A==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.3.tgz",
|
||||
"integrity": "sha512-MmhwbacKxZPkLwwOqysVY8mUb8lFoyFIPlYhSLV4xS1C8X4HWALljIul1qMl1RYudp9Uc3PsOzRexl+OvCGfUw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -353,9 +353,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.2.tgz",
|
||||
"integrity": "sha512-S1D0VwdidwyfIKE58t94rD+EEb5B64ORMVkTw5FBZJirShkk82+0G9H3jNgWrRMt1PB3Qn1286/wqDLQ9+fTsA==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.3.tgz",
|
||||
"integrity": "sha512-OrNlsKi/QPw59Po040oRKn8IuqFEk4upc/4FaFKqVkcmQjjZrMg5Kgy9ZfWIhHdAnWXXggZZIPArpt0X1B0ceA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -365,9 +365,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.2.tgz",
|
||||
"integrity": "sha512-tnct1hf9GAlMchhYU6Lqmbm2nUKPO8apS7tuTIiucQh6gx+vbHmFZHFNHhw1AUJTpsj/eH2Z9iNayuC5Scdvhw==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.3.tgz",
|
||||
"integrity": "sha512-lIT0A7a6eqX51IfGyhECtpXXgsr//kgbd+HZbcCdPy2GMmNezSch/7V22zExDSpF32hX8WfgcTLYCVWVilggDQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -4869,33 +4869,33 @@
|
||||
}
|
||||
},
|
||||
"@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.2.tgz",
|
||||
"integrity": "sha512-CDh+sU2k4xVfWauwDZnybma8AJ+Q2i0SzHg05BwgDcani7I0k60NjJ5GobpgQ38xOiEmwHllES1xs4NRh+1YkA==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.3.3.tgz",
|
||||
"integrity": "sha512-nvyj7xNX2/wb/PH5TjyhLR/NQ1jVuoBw2B5UaSg7qf8Tnm5SSXWQ7F25RVKcKwh72fz1qB+CWW24ftZnRzbT/Q==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.2.tgz",
|
||||
"integrity": "sha512-xevyA+M/UE8ttaNkx68AyIUKlyWMhIzOECx0hbyN1zfShJe2UcunQcmbM1NxUi7EywodByyiP7bfMI1ZR1Y4Mw==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.3.3.tgz",
|
||||
"integrity": "sha512-7CW+nILyPHp6cua0Rl0xaTDWw/vajEn/jCsEjFYgDmE+rtf5Z5Fum41FxR9C2TtIAvUK+nWb5mkYeOLqU6vRvg==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.2.tgz",
|
||||
"integrity": "sha512-mSKkQ/p6UTSLwWzfZMBS7wA6Gf335KljXLaOhdT4TUI/jC6e9/cvZKkXRgpdE9/gvfl4/WVzKY7sg3+azDYQ+A==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.3.3.tgz",
|
||||
"integrity": "sha512-MmhwbacKxZPkLwwOqysVY8mUb8lFoyFIPlYhSLV4xS1C8X4HWALljIul1qMl1RYudp9Uc3PsOzRexl+OvCGfUw==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.2.tgz",
|
||||
"integrity": "sha512-S1D0VwdidwyfIKE58t94rD+EEb5B64ORMVkTw5FBZJirShkk82+0G9H3jNgWrRMt1PB3Qn1286/wqDLQ9+fTsA==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.3.3.tgz",
|
||||
"integrity": "sha512-OrNlsKi/QPw59Po040oRKn8IuqFEk4upc/4FaFKqVkcmQjjZrMg5Kgy9ZfWIhHdAnWXXggZZIPArpt0X1B0ceA==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.2.tgz",
|
||||
"integrity": "sha512-tnct1hf9GAlMchhYU6Lqmbm2nUKPO8apS7tuTIiucQh6gx+vbHmFZHFNHhw1AUJTpsj/eH2Z9iNayuC5Scdvhw==",
|
||||
"version": "0.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.3.3.tgz",
|
||||
"integrity": "sha512-lIT0A7a6eqX51IfGyhECtpXXgsr//kgbd+HZbcCdPy2GMmNezSch/7V22zExDSpF32hX8WfgcTLYCVWVilggDQ==",
|
||||
"optional": true
|
||||
},
|
||||
"@neon-rs/cli": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.3.3",
|
||||
"version": "0.3.4",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -81,10 +81,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.3.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.3.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.3.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.3.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.3.3"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.3.4",
|
||||
"@lancedb/vectordb-darwin-x64": "0.3.4",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.3.4",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.3.4",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.3.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ import { Query } from './query'
|
||||
import { isEmbeddingFunction } from './embedding/embedding_function'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableCleanupOldVersions, tableCompactFiles } = require('../native.js')
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats } = require('../native.js')
|
||||
|
||||
export { Query }
|
||||
export type { EmbeddingFunction }
|
||||
@@ -260,6 +260,27 @@ export interface Table<T = number[]> {
|
||||
* ```
|
||||
*/
|
||||
delete: (filter: string) => Promise<void>
|
||||
|
||||
/**
|
||||
* List the indicies on this table.
|
||||
*/
|
||||
listIndices: () => Promise<VectorIndex[]>
|
||||
|
||||
/**
|
||||
* Get statistics about an index.
|
||||
*/
|
||||
indexStats: (indexUuid: string) => Promise<IndexStats>
|
||||
}
|
||||
|
||||
export interface VectorIndex {
|
||||
columns: string[]
|
||||
name: string
|
||||
uuid: string
|
||||
}
|
||||
|
||||
export interface IndexStats {
|
||||
numIndexedRows: number | null
|
||||
numUnindexedRows: number | null
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -502,6 +523,14 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
return res.metrics
|
||||
})
|
||||
}
|
||||
|
||||
async listIndices (): Promise<VectorIndex[]> {
|
||||
return tableListIndices.call(this._tbl)
|
||||
}
|
||||
|
||||
async indexStats (indexUuid: string): Promise<IndexStats> {
|
||||
return tableIndexStats.call(this._tbl, indexUuid)
|
||||
}
|
||||
}
|
||||
|
||||
export interface CleanupStats {
|
||||
|
||||
@@ -65,8 +65,8 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
||||
const mirroredPath = path.join(dir, `${tableName}.lance`)
|
||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
||||
if (err != null) throw err
|
||||
// there should be two dirs
|
||||
assert.equal(files.length, 2)
|
||||
// there should be three dirs
|
||||
assert.equal(files.length, 3)
|
||||
assert.isTrue(files[0].isDirectory())
|
||||
assert.isTrue(files[1].isDirectory())
|
||||
|
||||
@@ -76,6 +76,12 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
||||
assert.isTrue(files[0].name.endsWith('.txn'))
|
||||
})
|
||||
|
||||
fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
|
||||
if (err != null) throw err
|
||||
assert.equal(files.length, 1)
|
||||
assert.isTrue(files[0].name.endsWith('.manifest'))
|
||||
})
|
||||
|
||||
fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
|
||||
if (err != null) throw err
|
||||
assert.equal(files.length, 1)
|
||||
@@ -88,8 +94,8 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
||||
|
||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
||||
if (err != null) throw err
|
||||
// there should be two dirs
|
||||
assert.equal(files.length, 3)
|
||||
// there should be four dirs
|
||||
assert.equal(files.length, 4)
|
||||
assert.isTrue(files[0].isDirectory())
|
||||
assert.isTrue(files[1].isDirectory())
|
||||
assert.isTrue(files[2].isDirectory())
|
||||
@@ -128,12 +134,13 @@ describe('LanceDB Mirrored Store Integration test', function () {
|
||||
|
||||
fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
|
||||
if (err != null) throw err
|
||||
// there should be two dirs
|
||||
assert.equal(files.length, 4)
|
||||
// there should be five dirs
|
||||
assert.equal(files.length, 5)
|
||||
assert.isTrue(files[0].isDirectory())
|
||||
assert.isTrue(files[1].isDirectory())
|
||||
assert.isTrue(files[2].isDirectory())
|
||||
assert.isTrue(files[3].isDirectory())
|
||||
assert.isTrue(files[4].isDirectory())
|
||||
|
||||
// Three TXs now
|
||||
fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
|
||||
import {
|
||||
type EmbeddingFunction, type Table, type VectorIndexParams, type Connection,
|
||||
type ConnectionOptions, type CreateTableOptions, type WriteOptions
|
||||
type ConnectionOptions, type CreateTableOptions, type VectorIndex,
|
||||
type WriteOptions,
|
||||
type IndexStats
|
||||
} from '../index'
|
||||
import { Query } from '../query'
|
||||
|
||||
@@ -241,4 +243,21 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
||||
async delete (filter: string): Promise<void> {
|
||||
await this._client.post(`/v1/table/${this._name}/delete/`, { predicate: filter })
|
||||
}
|
||||
|
||||
async listIndices (): Promise<VectorIndex[]> {
|
||||
const results = await this._client.post(`/v1/table/${this._name}/index/list/`)
|
||||
return results.data.indexes?.map((index: any) => ({
|
||||
columns: index.columns,
|
||||
name: index.index_name,
|
||||
uuid: index.index_uuid
|
||||
}))
|
||||
}
|
||||
|
||||
async indexStats (indexUuid: string): Promise<IndexStats> {
|
||||
const results = await this._client.post(`/v1/table/${this._name}/index/${indexUuid}/stats/`)
|
||||
return {
|
||||
numIndexedRows: results.data.num_indexed_rows,
|
||||
numUnindexedRows: results.data.num_unindexed_rows
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,6 +328,24 @@ describe('LanceDB client', function () {
|
||||
const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 })
|
||||
await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0')
|
||||
})
|
||||
|
||||
it('should be able to list index and stats', async function () {
|
||||
const uri = await createTestDB(32, 300)
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
|
||||
|
||||
const indices = await table.listIndices()
|
||||
expect(indices).to.have.lengthOf(1)
|
||||
expect(indices[0].name).to.equal('vector_idx')
|
||||
expect(indices[0].uuid).to.not.be.equal(undefined)
|
||||
expect(indices[0].columns).to.have.lengthOf(1)
|
||||
expect(indices[0].columns[0]).to.equal('vector')
|
||||
|
||||
const stats = await table.indexStats(indices[0].uuid)
|
||||
expect(stats.numIndexedRows).to.equal(300)
|
||||
expect(stats.numUnindexedRows).to.equal(0)
|
||||
}).timeout(50_000)
|
||||
})
|
||||
|
||||
describe('when using a custom embedding function', function () {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.3.1
|
||||
current_version = 0.3.2
|
||||
commit = True
|
||||
message = [python] Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
@@ -327,7 +327,12 @@ class LanceModel(pydantic.BaseModel):
|
||||
for vec, func in vec_and_function:
|
||||
for source, field_info in cls.safe_get_fields().items():
|
||||
src_func = get_extras(field_info, "source_column_for")
|
||||
if src_func == func:
|
||||
if src_func is func:
|
||||
# note we can't use == here since the function is a pydantic
|
||||
# model so two instances of the same function are ==, so if you
|
||||
# have multiple vector columns from multiple sources, both will
|
||||
# be mapped to the same source column
|
||||
# GH594
|
||||
configs.append(
|
||||
EmbeddingFunctionConfig(
|
||||
source_column=source, vector_column=vec, function=func
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.3.1"
|
||||
version = "0.3.2"
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.8.6",
|
||||
"pylance==0.8.7",
|
||||
"ratelimiter~=1.0",
|
||||
"retry>=0.9.2",
|
||||
"tqdm>=4.1.0",
|
||||
|
||||
@@ -33,10 +33,13 @@ def test_sentence_transformer(alias, tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
registry = get_registry()
|
||||
func = registry.get(alias).create()
|
||||
func2 = registry.get(alias).create()
|
||||
|
||||
class Words(LanceModel):
|
||||
text: str = func.SourceField()
|
||||
text2: str = func2.SourceField()
|
||||
vector: Vector(func.ndims()) = func.VectorField()
|
||||
vector2: Vector(func2.ndims()) = func2.VectorField()
|
||||
|
||||
table = db.create_table("words", schema=Words)
|
||||
table.add(
|
||||
@@ -50,7 +53,16 @@ def test_sentence_transformer(alias, tmp_path):
|
||||
"foo",
|
||||
"bar",
|
||||
"baz",
|
||||
]
|
||||
],
|
||||
"text2": [
|
||||
"to be or not to be",
|
||||
"that is the question",
|
||||
"for whether tis nobler",
|
||||
"in the mind to suffer",
|
||||
"the slings and arrows",
|
||||
"of outrageous fortune",
|
||||
"or to take arms",
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
@@ -62,6 +74,13 @@ def test_sentence_transformer(alias, tmp_path):
|
||||
expected = table.search(vec).limit(1).to_pydantic(Words)[0]
|
||||
assert actual.text == expected.text
|
||||
assert actual.text == "hello world"
|
||||
assert not np.allclose(actual.vector, actual.vector2)
|
||||
|
||||
actual = (
|
||||
table.search(query, vector_column_name="vector2").limit(1).to_pydantic(Words)[0]
|
||||
)
|
||||
assert actual.text != "hello world"
|
||||
assert not np.allclose(actual.vector, actual.vector2)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
@@ -14,7 +14,7 @@ arrow-array = { workspace = true }
|
||||
arrow-ipc = { workspace = true }
|
||||
arrow-schema = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
conv = "0.3.3"
|
||||
conv = "0.3.4"
|
||||
once_cell = "1"
|
||||
futures = "0.3"
|
||||
half = { workspace = true }
|
||||
|
||||
@@ -239,6 +239,8 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
|
||||
cx.export_function("tableDelete", JsTable::js_delete)?;
|
||||
cx.export_function("tableCleanupOldVersions", JsTable::js_cleanup)?;
|
||||
cx.export_function("tableCompactFiles", JsTable::js_compact)?;
|
||||
cx.export_function("tableListIndices", JsTable::js_list_indices)?;
|
||||
cx.export_function("tableIndexStats", JsTable::js_index_stats)?;
|
||||
cx.export_function(
|
||||
"tableCreateVectorIndex",
|
||||
index::vector::table_create_vector_index,
|
||||
|
||||
@@ -247,7 +247,7 @@ impl JsTable {
|
||||
}
|
||||
|
||||
rt.spawn(async move {
|
||||
let stats = table.compact_files(options).await;
|
||||
let stats = table.compact_files(options, None).await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let stats = stats.or_throw(&mut cx)?;
|
||||
@@ -276,4 +276,91 @@ impl JsTable {
|
||||
});
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
pub(crate) fn js_list_indices(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let (deferred, promise) = cx.promise();
|
||||
// let predicate = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
let channel = cx.channel();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let indices = table.load_indices().await;
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let indices = indices.or_throw(&mut cx)?;
|
||||
|
||||
let output = JsArray::new(&mut cx, indices.len() as u32);
|
||||
for (i, index) in indices.iter().enumerate() {
|
||||
let js_index = JsObject::new(&mut cx);
|
||||
let index_name = cx.string(index.index_name.clone());
|
||||
js_index.set(&mut cx, "name", index_name)?;
|
||||
|
||||
let index_uuid = cx.string(index.index_uuid.clone());
|
||||
js_index.set(&mut cx, "uuid", index_uuid)?;
|
||||
|
||||
let js_index_columns = JsArray::new(&mut cx, index.columns.len() as u32);
|
||||
for (j, column) in index.columns.iter().enumerate() {
|
||||
let js_column = cx.string(column.clone());
|
||||
js_index_columns.set(&mut cx, j as u32, js_column)?;
|
||||
}
|
||||
js_index.set(&mut cx, "columns", js_index_columns)?;
|
||||
|
||||
output.set(&mut cx, i as u32, js_index)?;
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
})
|
||||
});
|
||||
Ok(promise)
|
||||
}
|
||||
|
||||
pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult<JsPromise> {
|
||||
let js_table = cx.this().downcast_or_throw::<JsBox<JsTable>, _>(&mut cx)?;
|
||||
let rt = runtime(&mut cx)?;
|
||||
let (deferred, promise) = cx.promise();
|
||||
let index_uuid = cx.argument::<JsString>(0)?.value(&mut cx);
|
||||
let channel = cx.channel();
|
||||
let table = js_table.table.clone();
|
||||
|
||||
rt.spawn(async move {
|
||||
let load_stats = futures::try_join!(
|
||||
table.count_indexed_rows(&index_uuid),
|
||||
table.count_unindexed_rows(&index_uuid)
|
||||
);
|
||||
|
||||
deferred.settle_with(&channel, move |mut cx| {
|
||||
let (indexed_rows, unindexed_rows) = load_stats.or_throw(&mut cx)?;
|
||||
|
||||
let output = JsObject::new(&mut cx);
|
||||
|
||||
match indexed_rows {
|
||||
Some(x) => {
|
||||
let i = cx.number(x as f64);
|
||||
output.set(&mut cx, "numIndexedRows", i)?;
|
||||
}
|
||||
None => {
|
||||
let null = cx.null();
|
||||
output.set(&mut cx, "numIndexedRows", null)?;
|
||||
}
|
||||
};
|
||||
|
||||
match unindexed_rows {
|
||||
Some(x) => {
|
||||
let i = cx.number(x as f64);
|
||||
output.set(&mut cx, "numUnindexedRows", i)?;
|
||||
}
|
||||
None => {
|
||||
let null = cx.null();
|
||||
output.set(&mut cx, "numUnindexedRows", null)?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(output)
|
||||
})
|
||||
});
|
||||
|
||||
Ok(promise)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
edition = "2021"
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lance::format::{Index, Manifest};
|
||||
use lance::index::vector::ivf::IvfBuildParams;
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
@@ -106,6 +107,27 @@ impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VectorIndex {
|
||||
pub columns: Vec<String>,
|
||||
pub index_name: String,
|
||||
pub index_uuid: String,
|
||||
}
|
||||
|
||||
impl VectorIndex {
|
||||
pub fn new_from_format(manifest: &Manifest, index: &Index) -> VectorIndex {
|
||||
let fields = index
|
||||
.fields
|
||||
.iter()
|
||||
.map(|i| manifest.schema.fields[*i as usize].name.clone())
|
||||
.collect();
|
||||
VectorIndex {
|
||||
columns: fields,
|
||||
index_name: index.name.clone(),
|
||||
index_uuid: index.uuid.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -57,7 +57,7 @@ trait PrimaryOnly {
|
||||
|
||||
impl PrimaryOnly for Path {
|
||||
fn primary_only(&self) -> bool {
|
||||
self.to_string().contains("manifest")
|
||||
self.filename().unwrap_or("") == "_latest.manifest"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,8 +118,10 @@ impl ObjectStore for MirroringObjectStore {
|
||||
self.primary.head(location).await
|
||||
}
|
||||
|
||||
// garbage collection on secondary will happen async from other means
|
||||
async fn delete(&self, location: &Path) -> Result<()> {
|
||||
if !location.primary_only() {
|
||||
self.secondary.delete(location).await?;
|
||||
}
|
||||
self.primary.delete(location).await
|
||||
}
|
||||
|
||||
@@ -132,7 +134,7 @@ impl ObjectStore for MirroringObjectStore {
|
||||
}
|
||||
|
||||
async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
|
||||
if from.primary_only() {
|
||||
if to.primary_only() {
|
||||
self.primary.copy(from, to).await
|
||||
} else {
|
||||
self.secondary.copy(from, to).await?;
|
||||
@@ -142,6 +144,9 @@ impl ObjectStore for MirroringObjectStore {
|
||||
}
|
||||
|
||||
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
|
||||
if !to.primary_only() {
|
||||
self.secondary.copy(from, to).await?;
|
||||
}
|
||||
self.primary.copy_if_not_exists(from, to).await
|
||||
}
|
||||
}
|
||||
@@ -379,7 +384,7 @@ mod test {
|
||||
let primary_f = primary_elem.unwrap().unwrap();
|
||||
// hit manifest, skip, _versions contains all the manifest and should not exist on secondary
|
||||
let primary_raw_path = primary_f.file_name().to_str().unwrap();
|
||||
if primary_raw_path.contains("manifest") || primary_raw_path.contains("_versions") {
|
||||
if primary_raw_path.contains("_latest.manifest") {
|
||||
primary_elem = primary_iter.next();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -18,14 +18,16 @@ use std::sync::Arc;
|
||||
use arrow_array::{Float32Array, RecordBatchReader};
|
||||
use arrow_schema::SchemaRef;
|
||||
use lance::dataset::cleanup::RemovalStats;
|
||||
use lance::dataset::optimize::{compact_files, CompactionMetrics, CompactionOptions};
|
||||
use lance::dataset::optimize::{
|
||||
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
||||
};
|
||||
use lance::dataset::{Dataset, WriteParams};
|
||||
use lance::index::IndexType;
|
||||
use lance::index::{DatasetIndexExt, IndexType};
|
||||
use lance::io::object_store::WrappingObjectStore;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::VectorIndexBuilder;
|
||||
use crate::index::vector::{VectorIndexBuilder, VectorIndex};
|
||||
use crate::query::Query;
|
||||
use crate::utils::{PatchReadParam, PatchWriteParam};
|
||||
use crate::WriteMode;
|
||||
@@ -153,6 +155,22 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn checkout_latest(&self) -> Result<Self> {
|
||||
let latest_version_id = self.dataset.latest_version_id().await?;
|
||||
let dataset = if latest_version_id == self.dataset.version().version {
|
||||
self.dataset.clone()
|
||||
} else {
|
||||
Arc::new(self.dataset.checkout_version(latest_version_id).await?)
|
||||
};
|
||||
|
||||
Ok(Table {
|
||||
name: self.name.clone(),
|
||||
uri: self.uri.clone(),
|
||||
dataset,
|
||||
store_wrapper: self.store_wrapper.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_table_name(uri: &str) -> Result<String> {
|
||||
let path = Path::new(uri);
|
||||
let name = path
|
||||
@@ -222,8 +240,6 @@ impl Table {
|
||||
|
||||
/// Create index on the table.
|
||||
pub async fn create_index(&mut self, index_builder: &impl VectorIndexBuilder) -> Result<()> {
|
||||
use lance::index::DatasetIndexExt;
|
||||
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
dataset
|
||||
.create_index(
|
||||
@@ -241,6 +257,14 @@ impl Table {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn optimize_indices(&mut self) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
|
||||
dataset.optimize_indices().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert records into this Table
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -337,12 +361,44 @@ impl Table {
|
||||
/// for faster reads.
|
||||
///
|
||||
/// This calls into [lance::dataset::optimize::compact_files].
|
||||
pub async fn compact_files(&mut self, options: CompactionOptions) -> Result<CompactionMetrics> {
|
||||
pub async fn compact_files(
|
||||
&mut self,
|
||||
options: CompactionOptions,
|
||||
remap_options: Option<Arc<dyn IndexRemapperOptions>>,
|
||||
) -> Result<CompactionMetrics> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
let metrics = compact_files(&mut dataset, options, None).await?;
|
||||
let metrics = compact_files(&mut dataset, options, remap_options).await?;
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
pub fn count_fragments(&self) -> usize {
|
||||
self.dataset.count_fragments()
|
||||
}
|
||||
|
||||
pub fn count_deleted_rows(&self) -> usize {
|
||||
self.dataset.count_deleted_rows()
|
||||
}
|
||||
|
||||
pub fn num_small_files(&self, max_rows_per_group: usize) -> usize {
|
||||
self.dataset.num_small_files(max_rows_per_group)
|
||||
}
|
||||
|
||||
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_indexed_rows(index_uuid).await?)
|
||||
}
|
||||
|
||||
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_unindexed_rows(index_uuid).await?)
|
||||
}
|
||||
|
||||
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
||||
let (indices, mf) = futures::try_join!(
|
||||
self.dataset.load_indices(),
|
||||
self.dataset.latest_manifest()
|
||||
)?;
|
||||
Ok(indices.iter().map(|i| VectorIndex::new_from_format(&mf, i)).collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user