Compare commits

..

1 Commits

Author SHA1 Message Date
qzhu
3965d1584c prog 1 2024-04-24 10:45:47 -07:00
34 changed files with 251 additions and 342 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.18
current_version = 0.4.17
commit = True
message = Bump version: {current_version} → {new_version}
tag = True

View File

@@ -14,22 +14,22 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.10.18", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.18" }
lance-linalg = { "version" = "=0.10.18" }
lance-testing = { "version" = "=0.10.18" }
lance = { "version" = "=0.10.15", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.15" }
lance-linalg = { "version" = "=0.10.15" }
lance-testing = { "version" = "=0.10.15" }
# Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"
arrow-data = "51.0"
arrow-ipc = "51.0"
arrow-ord = "51.0"
arrow-schema = "51.0"
arrow-arith = "51.0"
arrow-cast = "51.0"
arrow = { version = "50.0", optional = false }
arrow-array = "50.0"
arrow-data = "50.0"
arrow-ipc = "50.0"
arrow-ord = "50.0"
arrow-schema = "50.0"
arrow-arith = "50.0"
arrow-cast = "50.0"
async-trait = "0"
chrono = "0.4.35"
half = { "version" = "2.4.1", default-features = false, features = [
half = { "version" = "=2.3.1", default-features = false, features = [
"num-traits",
] }
futures = "0"

View File

@@ -159,7 +159,7 @@ Allows you to set parameters when registering a `sentence-transformers` object.
from lancedb.embeddings import get_registry
db = lancedb.connect("/tmp/db")
model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
model = get_registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
class Words(LanceModel):
text: str = model.SourceField()

View File

@@ -46,7 +46,7 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
```python
class Pets(LanceModel):
vector: Vector(clip.ndims()) = clip.VectorField()
vector: Vector(clip.ndims) = clip.VectorField()
image_uri: str = clip.SourceField()
```
@@ -149,7 +149,7 @@ You can also use the integration for adding utility operations in the schema. Fo
```python
class Pets(LanceModel):
vector: Vector(clip.ndims()) = clip.VectorField()
vector: Vector(clip.ndims) = clip.VectorField()
image_uri: str = clip.SourceField()
@property
@@ -166,4 +166,4 @@ rs[2].image
![](../assets/dog_clip_output.png)
Now that you have the basic idea about LanceDB embedding functions and the embedding function registry,
let's dive deeper into defining your own [custom functions](./custom_embedding_function.md).
let's dive deeper into defining your own [custom functions](./custom_embedding_function.md).

74
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.4.18",
"version": "0.4.17",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.4.18",
"version": "0.4.17",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.18",
"@lancedb/vectordb-darwin-x64": "0.4.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.18",
"@lancedb/vectordb-linux-x64-gnu": "0.4.18",
"@lancedb/vectordb-win32-x64-msvc": "0.4.18"
"@lancedb/vectordb-darwin-arm64": "0.4.17",
"@lancedb/vectordb-darwin-x64": "0.4.17",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
"@lancedb/vectordb-linux-x64-gnu": "0.4.17",
"@lancedb/vectordb-win32-x64-msvc": "0.4.17"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -333,66 +333,6 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.18.tgz",
"integrity": "sha512-CzJbkBKz30U0ocFFhRLV3ZPRZh3MtAkOmFr76jxRWeXLPM/JcLvhGOAnW9h/XdTONidHOfHNZnUtrjeWDMCyig==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.18.tgz",
"integrity": "sha512-wyqpfdDBE5g+8SLN/6E/r37smt5i+4H3MVNZ2GZfvcMAd4xIZTwGAf5Mfx8j15t3mvKMiBEZPTvYQfFde2bQmA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.18.tgz",
"integrity": "sha512-OfCjTrwfdmT9Qh5r92AUj7Wosvl8mSYADS6rp+ofNoht9nq1UqtlyrCot1RhuF5w6UW1aLCJXycXY0qHh1WUPw==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.18.tgz",
"integrity": "sha512-uJiCminsZQ6oUvVYEElgN+/Lqd9646cJUWCbfiFSnt10PCj/kFBXWjKEuCxfG/A0bp6DTm5mU5RYWDfY9v3T0Q==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.18.tgz",
"integrity": "sha512-T9//HlvtNDHEfbIjz0ExLQFbqKFHRdaKf3jpFECt5oJdU0VCXe5460DIMvp6w/SDf24pb1UvOjZnmPLvX4yPNg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.4.18",
"version": "0.4.17",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.18",
"@lancedb/vectordb-darwin-x64": "0.4.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.18",
"@lancedb/vectordb-linux-x64-gnu": "0.4.18",
"@lancedb/vectordb-win32-x64-msvc": "0.4.18"
"@lancedb/vectordb-darwin-arm64": "0.4.17",
"@lancedb/vectordb-darwin-x64": "0.4.17",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
"@lancedb/vectordb-linux-x64-gnu": "0.4.17",
"@lancedb/vectordb-win32-x64-msvc": "0.4.17"
}
}

View File

@@ -140,9 +140,6 @@ export class RemoteConnection implements Connection {
schema = nameOrOpts.schema
embeddings = nameOrOpts.embeddingFunction
tableName = nameOrOpts.name
if (data === undefined) {
data = nameOrOpts.data
}
}
let buffer: Buffer

1
nodejs/.gitignore vendored
View File

@@ -1 +0,0 @@
yarn.lock

View File

@@ -77,18 +77,6 @@ export interface OpenTableOptions {
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
*/
storageOptions?: Record<string, string>;
/**
* Set the size of the index cache, specified as a number of entries
*
* The exact meaning of an "entry" will depend on the type of index:
* - IVF: there is one entry for each IVF partition
* - BTREE: there is one entry for the entire index
*
* This cache applies to the entire opened table, across all indices.
* Setting this value higher will increase performance on larger datasets
* at the expense of more RAM
*/
indexCacheSize?: number;
}
export interface TableNamesOptions {
@@ -172,7 +160,6 @@ export class Connection {
const innerTable = await this.inner.openTable(
name,
cleanseStorageOptions(options?.storageOptions),
options?.indexCacheSize,
);
return new Table(innerTable);
}

View File

@@ -169,20 +169,17 @@ export class Table {
* // If the column has a vector (fixed size list) data type then
* // an IvfPq vector index will be created.
* const table = await conn.openTable("my_table");
* await table.createIndex("vector");
* await table.createIndex(["vector"]);
* @example
* // For advanced control over vector index creation you can specify
* // the index type and options.
* const table = await conn.openTable("my_table");
* await table.createIndex("vector", {
* config: lancedb.Index.ivfPq({
* numPartitions: 128,
* numSubVectors: 16,
* }),
* });
* await table.createIndex(["vector"], I)
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
* .build();
* @example
* // Or create a Scalar index
* await table.createIndex("my_float_col");
* await table.createIndex("my_float_col").build();
*/
async createIndex(column: string, options?: Partial<IndexOptions>) {
// Bit of a hack to get around the fact that TS has no package-scope.
@@ -200,7 +197,8 @@ export class Table {
* vector similarity, sorting, and more.
*
* Note: By default, all columns are returned. For best performance, you should
* only fetch the columns you need.
* only fetch the columns you need. See [`Query::select_with_projection`] for
* more details.
*
* When appropriate, various indices and statistics based pruning will be used to
* accelerate the query.
@@ -208,13 +206,10 @@ export class Table {
* // SQL-style filtering
* //
* // This query will return up to 1000 rows whose value in the `id` column
* // is greater than 5. LanceDb supports a broad set of filtering functions.
* for await (const batch of table
* .query()
* .where("id > 1")
* .select(["id"])
* .limit(20)) {
* console.log(batch);
* // is greater than 5. LanceDb supports a broad set of filtering functions.
* for await (const batch of table.query()
* .filter("id > 1").select(["id"]).limit(20)) {
* console.log(batch);
* }
* @example
* // Vector Similarity Search
@@ -223,14 +218,13 @@ export class Table {
* // closest to the query vector [1.0, 2.0, 3.0]. If an index has been created
* // on the "vector" column then this will perform an ANN search.
* //
* // The `refineFactor` and `nprobes` methods are used to control the recall /
* // The `refine_factor` and `nprobes` methods are used to control the recall /
* // latency tradeoff of the search.
* for await (const batch of table
* .query()
* .where("id > 1")
* .select(["id"])
* .limit(20)) {
* console.log(batch);
* for await (const batch of table.query()
* .nearestTo([1, 2, 3])
* .refineFactor(5).nprobe(10)
* .limit(10)) {
* console.log(batch);
* }
* @example
* // Scan the full dataset
@@ -292,45 +286,43 @@ export class Table {
await this.inner.dropColumns(columnNames);
}
/** Retrieve the version of the table */
/**
* Retrieve the version of the table
*
* LanceDb supports versioning. Every operation that modifies the table increases
* version. As long as a version hasn't been deleted you can `[Self::checkout]` that
* version to view the data at that point. In addition, you can `[Self::restore]` the
* version to replace the current table with a previous version.
*/
async version(): Promise<number> {
return await this.inner.version();
}
/**
* Checks out a specific version of the table _This is an in-place operation._
* Checks out a specific version of the Table
*
* This allows viewing previous versions of the table. If you wish to
* keep writing to the dataset starting from an old version, then use
* the `restore` function.
* Any read operation on the table will now access the data at the checked out version.
* As a consequence, calling this method will disable any read consistency interval
* that was previously set.
*
* Calling this method will set the table into time-travel mode. If you
* wish to return to standard mode, call `checkoutLatest`.
* @param {number} version The version to checkout
* @example
* ```typescript
* import * as lancedb from "@lancedb/lancedb"
* const db = await lancedb.connect("./.lancedb");
* const table = await db.createTable("my_table", [
* { vector: [1.1, 0.9], type: "vector" },
* ]);
* This is a read-only operation that turns the table into a sort of "view"
* or "detached head". Other table instances will not be affected. To make the change
* permanent you can use the `[Self::restore]` method.
*
* console.log(await table.version()); // 1
* console.log(table.display());
* await table.add([{ vector: [0.5, 0.2], type: "vector" }]);
* await table.checkout(1);
* console.log(await table.version()); // 2
* ```
* Any operation that modifies the table will fail while the table is in a checked
* out state.
*
* To return the table to a normal state use `[Self::checkout_latest]`
*/
async checkout(version: number): Promise<void> {
await this.inner.checkout(version);
}
/**
* Checkout the latest version of the table. _This is an in-place operation._
* Ensures the table is pointing at the latest version
*
* The table will be set back into standard mode, and will track the latest
* version of the table.
* This can be used to manually update a table when the read_consistency_interval is None
* It can also be used to undo a `[Self::checkout]` operation
*/
async checkoutLatest(): Promise<void> {
await this.inner.checkoutLatest();
@@ -352,7 +344,9 @@ export class Table {
await this.inner.restore();
}
/** List all indices that have been created with {@link Table.createIndex} */
/**
* List all indices that have been created with Self::create_index
*/
async listIndices(): Promise<IndexConfig[]> {
return await this.inner.listIndices();
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.4.18",
"version": "0.4.17",
"os": [
"darwin"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.4.18",
"version": "0.4.17",
"os": [
"darwin"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.4.18",
"version": "0.4.17",
"os": [
"linux"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.4.18",
"version": "0.4.17",
"os": [
"linux"
],

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.4.18",
"version": "0.4.16",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.4.18",
"version": "0.4.16",
"cpu": [
"x64",
"arm64"
@@ -45,6 +45,13 @@
},
"engines": {
"node": ">= 18"
},
"optionalDependencies": {
"@lancedb/lancedb-darwin-arm64": "0.4.16",
"@lancedb/lancedb-darwin-x64": "0.4.16",
"@lancedb/lancedb-linux-arm64-gnu": "0.4.16",
"@lancedb/lancedb-linux-x64-gnu": "0.4.16",
"@lancedb/lancedb-win32-x64-msvc": "0.4.16"
}
},
"node_modules/@75lb/deep-merge": {
@@ -2214,6 +2221,81 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@lancedb/lancedb-darwin-arm64": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-arm64/-/lancedb-darwin-arm64-0.4.16.tgz",
"integrity": "sha512-CV65ouIDQbBSNtdHbQSr2fqXflOuqud1cfweUS+EiK7eEOEYl7nO2oiFYO49Jy76MEwZxiP99hW825aCqIQJqg==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-darwin-x64": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-x64/-/lancedb-darwin-x64-0.4.16.tgz",
"integrity": "sha512-1CwIYCNdbFmV7fvqM+qUxbYgwxx0slcCV48PC/I19Ejitgtzw/NJiWDCvONhaLqG85lWNZm1xYceRpVv7b8seQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-linux-arm64-gnu": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-gnu/-/lancedb-linux-arm64-gnu-0.4.16.tgz",
"integrity": "sha512-CzLEbzoHKS6jV0k52YnvsiVNx0VzLp1Vz/zmbHI6HmB/XbS67qDO93Jk71MDmXq3JDw0FKFCw9ghkg+6YWq7ZA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-linux-x64-gnu": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-x64-gnu/-/lancedb-linux-x64-gnu-0.4.16.tgz",
"integrity": "sha512-nKChybybi8uA0AFRHBFm7Fz3VXcRm8riv5Gs7xQsrsCtYxxf4DT/0BfUvQ0xKbwNJa+fawHRxi9BOQewdj49fg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-win32-x64-msvc": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-win32-x64-msvc/-/lancedb-win32-x64-msvc-0.4.16.tgz",
"integrity": "sha512-KMeBPMpv2g+ZMVsHVibed7BydrBlxje1qS0bZTDrLw9BtZOk6XH2lh1mCDnCJI6sbAscUKNA6fDCdquhQPHL7w==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@napi-rs/cli": {
"version": "2.18.0",
"resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-2.18.0.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb",
"version": "0.4.18",
"version": "0.4.17",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"napi": {
@@ -62,14 +62,20 @@
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
"chkformat": "prettier . --check",
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
"lint": "eslint lancedb __test__",
"lint-fix": "eslint lancedb __test__ --fix",
"lint": "eslint lancedb && eslint __test__",
"prepublishOnly": "napi prepublish -t npm",
"test": "npm run build && jest --verbose",
"integration": "S3_TEST=1 npm run test",
"universal": "napi universal",
"version": "napi version"
},
"optionalDependencies": {
"@lancedb/lancedb-darwin-arm64": "0.4.17",
"@lancedb/lancedb-darwin-x64": "0.4.17",
"@lancedb/lancedb-linux-arm64-gnu": "0.4.17",
"@lancedb/lancedb-linux-x64-gnu": "0.4.17",
"@lancedb/lancedb-win32-x64-msvc": "0.4.17"
},
"dependencies": {
"openai": "^4.29.2",
"apache-arrow": "^15.0.0"

View File

@@ -176,7 +176,6 @@ impl Connection {
&self,
name: String,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> napi::Result<Table> {
let mut builder = self.get_inner()?.open_table(&name);
if let Some(storage_options) = storage_options {
@@ -184,9 +183,6 @@ impl Connection {
builder = builder.storage_option(key, value);
}
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
let tbl = builder
.execute()
.await

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.11
current_version = 0.6.9
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -14,7 +14,7 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "51.0.0", features = ["pyarrow"] }
arrow = { version = "50.0.0", features = ["pyarrow"] }
lancedb = { path = "../rust/lancedb" }
env_logger = "0.10"
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }

View File

@@ -1,6 +1,6 @@
[project]
name = "lancedb"
version = "0.6.11"
version = "0.6.9"
dependencies = [
"deprecation",
"pylance==0.10.12",

View File

@@ -224,23 +224,13 @@ class DBConnection(EnforceOverrides):
def __getitem__(self, name: str) -> LanceTable:
return self.open_table(name)
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
def open_table(self, name: str) -> Table:
"""Open a Lance Table in the database.
Parameters
----------
name: str
The name of the table.
index_cache_size: int, default 256
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:
* IVF - there is one entry for each IVF partition
* BTREE - there is one entry for the entire index
This cache applies to the entire opened table, across all indices.
Setting this value higher will increase performance on larger datasets
at the expense of more RAM
Returns
-------
@@ -258,18 +248,6 @@ class DBConnection(EnforceOverrides):
"""
raise NotImplementedError
def rename_table(self, cur_name: str, new_name: str):
"""Rename a table in the database.
Parameters
----------
cur_name: str
The current name of the table.
new_name: str
The new name of the table.
"""
raise NotImplementedError
def drop_database(self):
"""
Drop database
@@ -429,9 +407,7 @@ class LanceDBConnection(DBConnection):
return tbl
@override
def open_table(
self, name: str, *, index_cache_size: Optional[int] = None
) -> LanceTable:
def open_table(self, name: str) -> LanceTable:
"""Open a table in the database.
Parameters
@@ -443,7 +419,7 @@ class LanceDBConnection(DBConnection):
-------
A LanceTable object representing the table.
"""
return LanceTable.open(self, name, index_cache_size=index_cache_size)
return LanceTable.open(self, name)
@override
def drop_table(self, name: str, ignore_missing: bool = False):
@@ -775,10 +751,7 @@ class AsyncConnection(object):
return AsyncTable(new_table)
async def open_table(
self,
name: str,
storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None,
self, name: str, storage_options: Optional[Dict[str, str]] = None
) -> Table:
"""Open a Lance Table in the database.
@@ -791,22 +764,12 @@ class AsyncConnection(object):
connection will be inherited by the table, but can be overridden here.
See available options at
https://lancedb.github.io/lancedb/guides/storage/
index_cache_size: int, default 256
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:
* IVF - there is one entry for each IVF partition
* BTREE - there is one entry for the entire index
This cache applies to the entire opened table, across all indices.
Setting this value higher will increase performance on larger datasets
at the expense of more RAM
Returns
-------
A LanceTable object representing the table.
"""
table = await self._inner.open_table(name, storage_options, index_cache_size)
table = await self._inner.open_table(name, storage_options)
return AsyncTable(table)
async def drop_table(self, name: str):

View File

@@ -94,7 +94,7 @@ class RemoteDBConnection(DBConnection):
yield item
@override
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
def open_table(self, name: str) -> Table:
"""Open a Lance Table in the database.
Parameters
@@ -110,12 +110,6 @@ class RemoteDBConnection(DBConnection):
self._client.mount_retry_adapter_for_table(name)
if index_cache_size is not None:
logging.info(
"index_cache_size is ignored in LanceDb Cloud"
" (there is no local cache to configure)"
)
# check if table exists
if self._table_cache.get(name) is None:
self._client.post(f"/v1/table/{name}/describe/")
@@ -287,24 +281,6 @@ class RemoteDBConnection(DBConnection):
)
self._table_cache.pop(name)
@override
def rename_table(self, cur_name: str, new_name: str):
"""Rename a table in the database.
Parameters
----------
cur_name: str
The current name of the table.
new_name: str
The new name of the table.
"""
self._client.post(
f"/v1/table/{cur_name}/rename/",
json={"new_table_name": new_name},
)
self._table_cache.pop(cur_name)
self._table_cache[new_name] = True
async def close(self):
"""Close the connection to the database."""
self._client.close()

View File

@@ -72,7 +72,7 @@ class RemoteTable(Table):
return resp
def index_stats(self, index_uuid: str):
"""List all the stats of a specified index"""
"""List all the indices on the table"""
resp = self._conn._client.post(
f"/v1/table/{self._name}/index/{index_uuid}/stats/"
)
@@ -485,6 +485,64 @@ class RemoteTable(Table):
payload = {"predicate": where, "updates": updates}
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
def checkout(self, version: int):
"""Checkout a version of the table. This is an in-place operation.
This allows viewing previous versions of the table. If you wish to
keep writing to the dataset starting from an old version, then use
the `restore` function.
Calling this method will set the table into time-travel mode. If you
wish to return to standard mode, call `checkout_latest`.
Parameters
----------
version : int
The version to checkout.
Examples ??? to be changed
--------
>>> import lancedb
>>> data = [{"vector": [1.1, 0.9], "type": "vector"}]
>>> db = lancedb.connect("db://...", api_key="...", # doctest: +SKIP
... region="...") # doctest: +SKIP
>>> table = db.create_table("my_table", data) # doctest: +SKIP
>>> table.version
2
>>> table.to_pandas()
vector type
0 [1.1, 0.9] vector
>>> table.add([{"vector": [0.5, 0.2], "type": "vector"}])
>>> table.version
3
>>> table.checkout(2)
>>> table.to_pandas()
vector type
0 [1.1, 0.9] vector
"""
def checkout_latest(self):
"""checkout_latest() is not yet supported on LanceDB cloud"""
raise NotImplementedError("checkout_latest() is not yet supported on LanceDB cloud")
def restore(self, version: int = None):
"""Restore a version of the table. This is an in-place operation.
This creates a new version where the data is equivalent to the
specified previous version. Data is not copied (as of python-v0.2.1).
Parameters
----------
version : int, default None
The version to restore. If unspecified then restores the currently
checked out version. If the currently checked out version is the
latest version then this is a no-op.
Examples
--------
>>> import lancedb
"""
max_version =
def cleanup_old_versions(self, *_):
"""cleanup_old_versions() is not supported on the LanceDB cloud"""

View File

@@ -806,7 +806,6 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
"""Reference to the latest version of a LanceDataset."""
uri: str
index_cache_size: Optional[int] = None
read_consistency_interval: Optional[timedelta] = None
last_consistency_check: Optional[float] = None
_dataset: Optional[LanceDataset] = None
@@ -814,9 +813,7 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
@property
def dataset(self) -> LanceDataset:
if not self._dataset:
self._dataset = lance.dataset(
self.uri, index_cache_size=self.index_cache_size
)
self._dataset = lance.dataset(self.uri)
self.last_consistency_check = time.monotonic()
elif self.read_consistency_interval is not None:
now = time.monotonic()
@@ -845,15 +842,12 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
class _LanceTimeTravelRef(_LanceDatasetRef):
uri: str
version: int
index_cache_size: Optional[int] = None
_dataset: Optional[LanceDataset] = None
@property
def dataset(self) -> LanceDataset:
if not self._dataset:
self._dataset = lance.dataset(
self.uri, version=self.version, index_cache_size=self.index_cache_size
)
self._dataset = lance.dataset(self.uri, version=self.version)
return self._dataset
@dataset.setter
@@ -890,8 +884,6 @@ class LanceTable(Table):
connection: "LanceDBConnection",
name: str,
version: Optional[int] = None,
*,
index_cache_size: Optional[int] = None,
):
self._conn = connection
self.name = name
@@ -900,13 +892,11 @@ class LanceTable(Table):
self._ref = _LanceTimeTravelRef(
uri=self._dataset_uri,
version=version,
index_cache_size=index_cache_size,
)
else:
self._ref = _LanceLatestDatasetRef(
uri=self._dataset_uri,
read_consistency_interval=connection.read_consistency_interval,
index_cache_size=index_cache_size,
)
@classmethod

View File

@@ -368,15 +368,6 @@ async def test_create_exist_ok_async(tmp_path):
# await db.create_table("test", schema=bad_schema, exist_ok=True)
def test_open_table_sync(tmp_path):
db = lancedb.connect(tmp_path)
db.create_table("test", data=[{"id": 0}])
assert db.open_table("test").count_rows() == 1
assert db.open_table("test", index_cache_size=0).count_rows() == 1
with pytest.raises(FileNotFoundError, match="does not exist"):
db.open_table("does_not_exist")
@pytest.mark.asyncio
async def test_open_table(tmp_path):
db = await lancedb.connect_async(tmp_path)
@@ -406,10 +397,6 @@ async def test_open_table(tmp_path):
}
)
# No way to verify this yet, but at least make sure we
# can pass the parameter
await db.open_table("test", index_cache_size=0)
with pytest.raises(ValueError, match="was not found"):
await db.open_table("does_not_exist")

View File

@@ -134,21 +134,17 @@ impl Connection {
})
}
#[pyo3(signature = (name, storage_options = None, index_cache_size = None))]
#[pyo3(signature = (name, storage_options = None))]
pub fn open_table(
self_: PyRef<'_, Self>,
name: String,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> PyResult<&PyAny> {
let inner = self_.get_inner()?.clone();
let mut builder = inner.open_table(name);
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
Ok(Table::new(table))

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.4.18"
version = "0.4.17"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.4.18"
version = "0.4.17"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -52,7 +52,7 @@ aws-sdk-kms = { version = "1.0" }
aws-config = { version = "1.0" }
[features]
default = []
default = ["remote"]
remote = ["dep:reqwest"]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
s3-test = []

View File

@@ -33,9 +33,6 @@ use crate::table::{NativeTable, WriteOptions};
use crate::utils::validate_table_name;
use crate::Table;
#[cfg(feature = "remote")]
use log::warn;
pub const LANCE_FILE_EXTENSION: &str = "lance";
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableBuilder) -> OpenTableBuilder + Send>;
@@ -582,7 +579,6 @@ impl ConnectBuilder {
let api_key = self.api_key.ok_or_else(|| Error::InvalidInput {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
warn!("The rust implementation of the remote client is not yet ready for use.");
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
&self.uri,
&api_key,
@@ -913,23 +909,12 @@ impl ConnectionInternal for Database {
}
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
// let the user provide their own ReadParams.
//
// If we have a user provided ReadParams use that
// If we don't then start with the default ReadParams and customize it with
// the options from the OpenTableBuilder
let read_params = options.lance_read_params.unwrap_or_else(|| ReadParams {
index_cache_size: options.index_cache_size as usize,
..Default::default()
});
let native_table = Arc::new(
NativeTable::open_with_params(
&table_uri,
&options.name,
self.store_wrapper.clone(),
Some(read_params),
options.lance_read_params,
self.read_consistency_interval,
)
.await?,
@@ -1047,6 +1032,7 @@ mod tests {
}
#[tokio::test]
#[ignore = "this can't pass due to https://github.com/lancedb/lancedb/issues/1019, enable it after the bug fixed"]
async fn test_open_table() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();

View File

@@ -46,18 +46,10 @@ impl VectorIndex {
}
}
#[derive(Debug, Deserialize)]
pub struct VectorIndexMetadata {
pub metric_type: String,
pub index_type: String,
}
#[derive(Debug, Deserialize)]
pub struct VectorIndexStatistics {
pub num_indexed_rows: usize,
pub num_unindexed_rows: usize,
pub index_type: String,
pub indices: Vec<VectorIndexMetadata>,
}
/// Builder for an IVF PQ index.

View File

@@ -350,16 +350,8 @@ mod test {
#[tokio::test]
async fn test_e2e() {
let dir1 = tempfile::tempdir()
.unwrap()
.into_path()
.canonicalize()
.unwrap();
let dir2 = tempfile::tempdir()
.unwrap()
.into_path()
.canonicalize()
.unwrap();
let dir1 = tempfile::tempdir().unwrap().into_path();
let dir2 = tempfile::tempdir().unwrap().into_path();
let secondary_store = LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap();
let object_store_wrapper = Arc::new(MirroringObjectStoreWrapper {

View File

@@ -34,16 +34,6 @@
//! cargo install lancedb
//! ```
//!
//! ## Crate Features
//!
//! ### Experimental Features
//!
//! These features are not enabled by default. They are experimental or in-development features that
//! are not yet ready to be released.
//!
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
//! and should not be enabled.
//!
//! ### Quick Start
//!
//! #### Connect to a database.

View File

@@ -87,16 +87,14 @@ impl ConnectionInternal for RemoteDatabase {
.await
.unwrap()?;
let rsp = self
.client
.post(&format!("/v1/table/{}/create/", options.name))
self.client
.post(&format!("/v1/table/{}/create", options.name))
.body(data_buffer)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE)
// This is currently expected by LanceDb cloud but will be removed soon.
.header("x-request-id", "na")
.send()
.await?;
self.client.check_response(rsp).await?;
Ok(Table::new(Arc::new(RemoteTable::new(
self.client.clone(),

View File

@@ -1061,26 +1061,6 @@ impl NativeTable {
}
}
pub async fn get_index_type(&self, index_uuid: &str) -> Result<Option<String>> {
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.index_type)),
None => Ok(None),
}
}
pub async fn get_distance_type(&self, index_uuid: &str) -> Result<Option<String>> {
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(
stats
.indices
.iter()
.map(|i| i.metric_type.clone())
.collect(),
)),
None => Ok(None),
}
}
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
let dataset = self.dataset.get().await?;
let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;