Compare commits

...

22 Commits

Author SHA1 Message Date
rmeng
295be1ab1c relax half req 2024-05-02 13:46:31 -04:00
rmeng
91f980ec5d chore: upgrade to lance 0.10.18 2024-05-02 13:42:18 -04:00
Cory Grinstead
71323a064a chore(nodejs): update docs on "table.ts" (#1255)
closes https://github.com/lancedb/lancedb/issues/1007
2024-05-01 23:00:22 -05:00
asmith26
df48454b70 Update embedding_functions.md (#1250)
`clip.ndims` seems to be a function (I installed with `pip install
open_clip_torch`).
2024-05-01 09:33:42 -07:00
Lance Release
6603414885 Updating package-lock.json 2024-04-30 20:57:12 +00:00
Lance Release
c256f6c502 Updating package-lock.json 2024-04-30 19:58:49 +00:00
Lance Release
cc03f90379 Updating package-lock.json 2024-04-30 19:21:48 +00:00
Lance Release
975da09b02 Bump version: 0.4.17 → 0.4.18 2024-04-30 19:21:37 +00:00
Cory Grinstead
c32e17b497 chore(nodejs): remove "optionalDependencies" (#1252)
closes #1248 

the binding specific `optionalDependencies` are added automatically as
part of the `prepublishOnly` hook, and they are not supposed to be
committed to `package.json`.



--- 

npm lifecycle scripts: 
https://docs.npmjs.com/cli/v7/using-npm/scripts#life-cycle-scripts
2024-04-30 10:51:10 -05:00
Ryan Green
0528abdf97 fix: fix path on remote create_table and check for error response (#1244) 2024-04-28 11:33:05 -02:30
Lance Release
1090c311e8 [python] Bump version: 0.6.10 → 0.6.11 2024-04-27 03:54:58 +00:00
Weston Pace
e767cbb374 chore: update to Lance version 0.10.16 and Arrow version 51 (#1247) 2024-04-26 16:26:57 -07:00
Weston Pace
3d7c48feca feat: allow the index_cache_size to be configured when opening a table (#1245)
This was already configurable in the rust API but it wasn't actually
being passed down to the underlying dataset. I added this option to both
the async python API and the new nodejs API.

I also added this option to the synchronous python API.

I did not add the option to vectordb.
2024-04-26 13:42:02 -07:00
Bert
08d62550bb fix: passing data to createTable as option (#1242)
Fixes issue where we would throw `Either data or schema needs to
defined` when passing `data` to `createTable` as a property of the first
argument (an object).

```ts
await db.createTable({
  name: 'table1',
  data,
  schema
})
```
2024-04-26 15:26:08 -04:00
Lei Xu
b272408b05 chore: fix main branch test failure (#1240) 2024-04-24 13:49:37 -07:00
Weston Pace
46ffa87cd4 chore: disable the remote feature by default (#1239)
The rust implementation of the remote client is not yet ready. This is
understandably confusing for users since it is enabled by default. This
PR disables it by default. We can re-enable it when we are ready (even
then it is not clear this is something that should be a default
feature).

---------

Co-authored-by: Will Jones <willjones127@gmail.com>
2024-04-24 09:28:24 -07:00
QianZhu
cd9fc37b95 add rename_table fn and more data for index_stats to return (#1234)
1. added rename_table fn to enable dashboard to rename a table
2. added index_type and distance_type (for vector index) to index_stats
so that more detailed data can be shown on the table page.
2024-04-23 16:42:26 -07:00
Lance Release
431f94e564 [python] Bump version: 0.6.9 → 0.6.10 2024-04-22 17:42:24 +00:00
Alex Kohler
c1a7d65473 chore: fix get_registry call in baai embeddings example (#1230) 2024-04-20 07:25:16 +05:30
Rob Meng
1e5ccb1614 chore: upgrade lance to 0.10.15 (#1229) 2024-04-19 10:31:39 -04:00
Bert
2e7ab373dc fix: update lance to 0.10.13 (#1226) 2024-04-17 09:29:10 -04:00
Weston Pace
c7fbc4aaee docs: fix minor typo (#1220) 2024-04-14 03:32:57 +05:30
35 changed files with 344 additions and 195 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.17
current_version = 0.4.18
commit = True
message = Bump version: {current_version} → {new_version}
tag = True

View File

@@ -14,22 +14,22 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.10.12", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.12" }
lance-linalg = { "version" = "=0.10.12" }
lance-testing = { "version" = "=0.10.12" }
lance = { "version" = "=0.10.18", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.18" }
lance-linalg = { "version" = "=0.10.18" }
lance-testing = { "version" = "=0.10.18" }
# Note that this one does not include pyarrow
arrow = { version = "50.0", optional = false }
arrow-array = "50.0"
arrow-data = "50.0"
arrow-ipc = "50.0"
arrow-ord = "50.0"
arrow-schema = "50.0"
arrow-arith = "50.0"
arrow-cast = "50.0"
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"
arrow-data = "51.0"
arrow-ipc = "51.0"
arrow-ord = "51.0"
arrow-schema = "51.0"
arrow-arith = "51.0"
arrow-cast = "51.0"
async-trait = "0"
chrono = "0.4.35"
half = { "version" = "=2.3.1", default-features = false, features = [
half = { "version" = "2.4.1", default-features = false, features = [
"num-traits",
] }
futures = "0"

View File

@@ -159,7 +159,7 @@ Allows you to set parameters when registering a `sentence-transformers` object.
from lancedb.embeddings import get_registry
db = lancedb.connect("/tmp/db")
model = get_registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
class Words(LanceModel):
text: str = model.SourceField()

View File

@@ -46,7 +46,7 @@ For this purpose, LanceDB introduces an **embedding functions API**, that allow
```python
class Pets(LanceModel):
vector: Vector(clip.ndims) = clip.VectorField()
vector: Vector(clip.ndims()) = clip.VectorField()
image_uri: str = clip.SourceField()
```
@@ -149,7 +149,7 @@ You can also use the integration for adding utility operations in the schema. Fo
```python
class Pets(LanceModel):
vector: Vector(clip.ndims) = clip.VectorField()
vector: Vector(clip.ndims()) = clip.VectorField()
image_uri: str = clip.SourceField()
@property
@@ -166,4 +166,4 @@ rs[2].image
![](../assets/dog_clip_output.png)
Now that you have the basic idea about LanceDB embedding functions and the embedding function registry,
let's dive deeper into defining your own [custom functions](./custom_embedding_function.md).
let's dive deeper into defining your own [custom functions](./custom_embedding_function.md).

74
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "vectordb",
"version": "0.4.17",
"version": "0.4.18",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "vectordb",
"version": "0.4.17",
"version": "0.4.18",
"cpu": [
"x64",
"arm64"
@@ -52,11 +52,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.17",
"@lancedb/vectordb-darwin-x64": "0.4.17",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
"@lancedb/vectordb-linux-x64-gnu": "0.4.17",
"@lancedb/vectordb-win32-x64-msvc": "0.4.17"
"@lancedb/vectordb-darwin-arm64": "0.4.18",
"@lancedb/vectordb-darwin-x64": "0.4.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.18",
"@lancedb/vectordb-linux-x64-gnu": "0.4.18",
"@lancedb/vectordb-win32-x64-msvc": "0.4.18"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",
@@ -333,6 +333,66 @@
"@jridgewell/sourcemap-codec": "^1.4.10"
}
},
"node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.18.tgz",
"integrity": "sha512-CzJbkBKz30U0ocFFhRLV3ZPRZh3MtAkOmFr76jxRWeXLPM/JcLvhGOAnW9h/XdTONidHOfHNZnUtrjeWDMCyig==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.18.tgz",
"integrity": "sha512-wyqpfdDBE5g+8SLN/6E/r37smt5i+4H3MVNZ2GZfvcMAd4xIZTwGAf5Mfx8j15t3mvKMiBEZPTvYQfFde2bQmA==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
]
},
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.18.tgz",
"integrity": "sha512-OfCjTrwfdmT9Qh5r92AUj7Wosvl8mSYADS6rp+ofNoht9nq1UqtlyrCot1RhuF5w6UW1aLCJXycXY0qHh1WUPw==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.18.tgz",
"integrity": "sha512-uJiCminsZQ6oUvVYEElgN+/Lqd9646cJUWCbfiFSnt10PCj/kFBXWjKEuCxfG/A0bp6DTm5mU5RYWDfY9v3T0Q==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
]
},
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.18.tgz",
"integrity": "sha512-T9//HlvtNDHEfbIjz0ExLQFbqKFHRdaKf3jpFECt5oJdU0VCXe5460DIMvp6w/SDf24pb1UvOjZnmPLvX4yPNg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
]
},
"node_modules/@neon-rs/cli": {
"version": "0.0.160",
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.4.17",
"version": "0.4.18",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
}
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.17",
"@lancedb/vectordb-darwin-x64": "0.4.17",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
"@lancedb/vectordb-linux-x64-gnu": "0.4.17",
"@lancedb/vectordb-win32-x64-msvc": "0.4.17"
"@lancedb/vectordb-darwin-arm64": "0.4.18",
"@lancedb/vectordb-darwin-x64": "0.4.18",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.18",
"@lancedb/vectordb-linux-x64-gnu": "0.4.18",
"@lancedb/vectordb-win32-x64-msvc": "0.4.18"
}
}

View File

@@ -163,7 +163,7 @@ export interface CreateTableOptions<T> {
/**
* Connect to a LanceDB instance at the given URI.
*
* Accpeted formats:
* Accepted formats:
*
* - `/path/to/database` - local database
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage

View File

@@ -140,6 +140,9 @@ export class RemoteConnection implements Connection {
schema = nameOrOpts.schema
embeddings = nameOrOpts.embeddingFunction
tableName = nameOrOpts.name
if (data === undefined) {
data = nameOrOpts.data
}
}
let buffer: Buffer

1
nodejs/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
yarn.lock

View File

@@ -20,7 +20,7 @@ import { Table as ArrowTable, Schema } from "apache-arrow";
/**
* Connect to a LanceDB instance at the given URI.
*
* Accpeted formats:
* Accepted formats:
*
* - `/path/to/database` - local database
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
@@ -77,6 +77,18 @@ export interface OpenTableOptions {
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
*/
storageOptions?: Record<string, string>;
/**
* Set the size of the index cache, specified as a number of entries
*
* The exact meaning of an "entry" will depend on the type of index:
* - IVF: there is one entry for each IVF partition
* - BTREE: there is one entry for the entire index
*
* This cache applies to the entire opened table, across all indices.
* Setting this value higher will increase performance on larger datasets
* at the expense of more RAM
*/
indexCacheSize?: number;
}
export interface TableNamesOptions {
@@ -160,6 +172,7 @@ export class Connection {
const innerTable = await this.inner.openTable(
name,
cleanseStorageOptions(options?.storageOptions),
options?.indexCacheSize,
);
return new Table(innerTable);
}

View File

@@ -169,17 +169,20 @@ export class Table {
* // If the column has a vector (fixed size list) data type then
* // an IvfPq vector index will be created.
* const table = await conn.openTable("my_table");
* await table.createIndex(["vector"]);
* await table.createIndex("vector");
* @example
* // For advanced control over vector index creation you can specify
* // the index type and options.
* const table = await conn.openTable("my_table");
* await table.createIndex(["vector"], I)
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
* .build();
* await table.createIndex("vector", {
* config: lancedb.Index.ivfPq({
* numPartitions: 128,
* numSubVectors: 16,
* }),
* });
* @example
* // Or create a Scalar index
* await table.createIndex("my_float_col").build();
* await table.createIndex("my_float_col");
*/
async createIndex(column: string, options?: Partial<IndexOptions>) {
// Bit of a hack to get around the fact that TS has no package-scope.
@@ -197,8 +200,7 @@ export class Table {
* vector similarity, sorting, and more.
*
* Note: By default, all columns are returned. For best performance, you should
* only fetch the columns you need. See [`Query::select_with_projection`] for
* more details.
* only fetch the columns you need.
*
* When appropriate, various indices and statistics based pruning will be used to
* accelerate the query.
@@ -206,10 +208,13 @@ export class Table {
* // SQL-style filtering
* //
* // This query will return up to 1000 rows whose value in the `id` column
* // is greater than 5. LanceDb supports a broad set of filtering functions.
* for await (const batch of table.query()
* .filter("id > 1").select(["id"]).limit(20)) {
* console.log(batch);
* // is greater than 5. LanceDb supports a broad set of filtering functions.
* for await (const batch of table
* .query()
* .where("id > 1")
* .select(["id"])
* .limit(20)) {
* console.log(batch);
* }
* @example
* // Vector Similarity Search
@@ -218,13 +223,14 @@ export class Table {
* // closest to the query vector [1.0, 2.0, 3.0]. If an index has been created
* // on the "vector" column then this will perform an ANN search.
* //
* // The `refine_factor` and `nprobes` methods are used to control the recall /
* // The `refineFactor` and `nprobes` methods are used to control the recall /
* // latency tradeoff of the search.
* for await (const batch of table.query()
* .nearestTo([1, 2, 3])
* .refineFactor(5).nprobe(10)
* .limit(10)) {
* console.log(batch);
* for await (const batch of table
* .query()
* .where("id > 1")
* .select(["id"])
* .limit(20)) {
* console.log(batch);
* }
* @example
* // Scan the full dataset
@@ -286,43 +292,45 @@ export class Table {
await this.inner.dropColumns(columnNames);
}
/**
* Retrieve the version of the table
*
* LanceDb supports versioning. Every operation that modifies the table increases
* version. As long as a version hasn't been deleted you can `[Self::checkout]` that
* version to view the data at that point. In addition, you can `[Self::restore]` the
* version to replace the current table with a previous version.
*/
/** Retrieve the version of the table */
async version(): Promise<number> {
return await this.inner.version();
}
/**
* Checks out a specific version of the Table
* Checks out a specific version of the table _This is an in-place operation._
*
* Any read operation on the table will now access the data at the checked out version.
* As a consequence, calling this method will disable any read consistency interval
* that was previously set.
* This allows viewing previous versions of the table. If you wish to
* keep writing to the dataset starting from an old version, then use
* the `restore` function.
*
* This is a read-only operation that turns the table into a sort of "view"
* or "detached head". Other table instances will not be affected. To make the change
* permanent you can use the `[Self::restore]` method.
* Calling this method will set the table into time-travel mode. If you
* wish to return to standard mode, call `checkoutLatest`.
* @param {number} version The version to checkout
* @example
* ```typescript
* import * as lancedb from "@lancedb/lancedb"
* const db = await lancedb.connect("./.lancedb");
* const table = await db.createTable("my_table", [
* { vector: [1.1, 0.9], type: "vector" },
* ]);
*
* Any operation that modifies the table will fail while the table is in a checked
* out state.
*
* To return the table to a normal state use `[Self::checkout_latest]`
* console.log(await table.version()); // 1
* console.log(table.display());
* await table.add([{ vector: [0.5, 0.2], type: "vector" }]);
* await table.checkout(1);
* console.log(await table.version()); // 2
* ```
*/
async checkout(version: number): Promise<void> {
await this.inner.checkout(version);
}
/**
* Ensures the table is pointing at the latest version
* Checkout the latest version of the table. _This is an in-place operation._
*
* This can be used to manually update a table when the read_consistency_interval is None
* It can also be used to undo a `[Self::checkout]` operation
* The table will be set back into standard mode, and will track the latest
* version of the table.
*/
async checkoutLatest(): Promise<void> {
await this.inner.checkoutLatest();
@@ -344,9 +352,7 @@ export class Table {
await this.inner.restore();
}
/**
* List all indices that have been created with Self::create_index
*/
/** List all indices that have been created with {@link Table.createIndex} */
async listIndices(): Promise<IndexConfig[]> {
return await this.inner.listIndices();
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.4.17",
"version": "0.4.18",
"os": [
"darwin"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.4.17",
"version": "0.4.18",
"os": [
"darwin"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.4.17",
"version": "0.4.18",
"os": [
"linux"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.4.17",
"version": "0.4.18",
"os": [
"linux"
],

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.4.16",
"version": "0.4.18",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.4.16",
"version": "0.4.18",
"cpu": [
"x64",
"arm64"
@@ -45,13 +45,6 @@
},
"engines": {
"node": ">= 18"
},
"optionalDependencies": {
"@lancedb/lancedb-darwin-arm64": "0.4.16",
"@lancedb/lancedb-darwin-x64": "0.4.16",
"@lancedb/lancedb-linux-arm64-gnu": "0.4.16",
"@lancedb/lancedb-linux-x64-gnu": "0.4.16",
"@lancedb/lancedb-win32-x64-msvc": "0.4.16"
}
},
"node_modules/@75lb/deep-merge": {
@@ -2221,81 +2214,6 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@lancedb/lancedb-darwin-arm64": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-arm64/-/lancedb-darwin-arm64-0.4.16.tgz",
"integrity": "sha512-CV65ouIDQbBSNtdHbQSr2fqXflOuqud1cfweUS+EiK7eEOEYl7nO2oiFYO49Jy76MEwZxiP99hW825aCqIQJqg==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-darwin-x64": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-darwin-x64/-/lancedb-darwin-x64-0.4.16.tgz",
"integrity": "sha512-1CwIYCNdbFmV7fvqM+qUxbYgwxx0slcCV48PC/I19Ejitgtzw/NJiWDCvONhaLqG85lWNZm1xYceRpVv7b8seQ==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-linux-arm64-gnu": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-gnu/-/lancedb-linux-arm64-gnu-0.4.16.tgz",
"integrity": "sha512-CzLEbzoHKS6jV0k52YnvsiVNx0VzLp1Vz/zmbHI6HmB/XbS67qDO93Jk71MDmXq3JDw0FKFCw9ghkg+6YWq7ZA==",
"cpu": [
"arm64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-linux-x64-gnu": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-x64-gnu/-/lancedb-linux-x64-gnu-0.4.16.tgz",
"integrity": "sha512-nKChybybi8uA0AFRHBFm7Fz3VXcRm8riv5Gs7xQsrsCtYxxf4DT/0BfUvQ0xKbwNJa+fawHRxi9BOQewdj49fg==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@lancedb/lancedb-win32-x64-msvc": {
"version": "0.4.16",
"resolved": "https://registry.npmjs.org/@lancedb/lancedb-win32-x64-msvc/-/lancedb-win32-x64-msvc-0.4.16.tgz",
"integrity": "sha512-KMeBPMpv2g+ZMVsHVibed7BydrBlxje1qS0bZTDrLw9BtZOk6XH2lh1mCDnCJI6sbAscUKNA6fDCdquhQPHL7w==",
"cpu": [
"x64"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 18"
}
},
"node_modules/@napi-rs/cli": {
"version": "2.18.0",
"resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-2.18.0.tgz",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb",
"version": "0.4.17",
"version": "0.4.18",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"napi": {
@@ -62,20 +62,14 @@
"build-release": "npm run build:release && tsc -b && shx cp lancedb/native.d.ts dist/native.d.ts",
"chkformat": "prettier . --check",
"docs": "typedoc --plugin typedoc-plugin-markdown --out ../docs/src/js lancedb/index.ts",
"lint": "eslint lancedb && eslint __test__",
"lint": "eslint lancedb __test__",
"lint-fix": "eslint lancedb __test__ --fix",
"prepublishOnly": "napi prepublish -t npm",
"test": "npm run build && jest --verbose",
"integration": "S3_TEST=1 npm run test",
"universal": "napi universal",
"version": "napi version"
},
"optionalDependencies": {
"@lancedb/lancedb-darwin-arm64": "0.4.17",
"@lancedb/lancedb-darwin-x64": "0.4.17",
"@lancedb/lancedb-linux-arm64-gnu": "0.4.17",
"@lancedb/lancedb-linux-x64-gnu": "0.4.17",
"@lancedb/lancedb-win32-x64-msvc": "0.4.17"
},
"dependencies": {
"openai": "^4.29.2",
"apache-arrow": "^15.0.0"

View File

@@ -176,6 +176,7 @@ impl Connection {
&self,
name: String,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> napi::Result<Table> {
let mut builder = self.get_inner()?.open_table(&name);
if let Some(storage_options) = storage_options {
@@ -183,6 +184,9 @@ impl Connection {
builder = builder.storage_option(key, value);
}
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
let tbl = builder
.execute()
.await

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.9
current_version = 0.6.11
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -14,7 +14,7 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "50.0.0", features = ["pyarrow"] }
arrow = { version = "51.0.0", features = ["pyarrow"] }
lancedb = { path = "../rust/lancedb" }
env_logger = "0.10"
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }

View File

@@ -1,6 +1,6 @@
[project]
name = "lancedb"
version = "0.6.9"
version = "0.6.11"
dependencies = [
"deprecation",
"pylance==0.10.12",

View File

@@ -224,13 +224,23 @@ class DBConnection(EnforceOverrides):
def __getitem__(self, name: str) -> LanceTable:
return self.open_table(name)
def open_table(self, name: str) -> Table:
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
"""Open a Lance Table in the database.
Parameters
----------
name: str
The name of the table.
index_cache_size: int, default 256
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:
* IVF - there is one entry for each IVF partition
* BTREE - there is one entry for the entire index
This cache applies to the entire opened table, across all indices.
Setting this value higher will increase performance on larger datasets
at the expense of more RAM
Returns
-------
@@ -248,6 +258,18 @@ class DBConnection(EnforceOverrides):
"""
raise NotImplementedError
def rename_table(self, cur_name: str, new_name: str):
"""Rename a table in the database.
Parameters
----------
cur_name: str
The current name of the table.
new_name: str
The new name of the table.
"""
raise NotImplementedError
def drop_database(self):
"""
Drop database
@@ -407,7 +429,9 @@ class LanceDBConnection(DBConnection):
return tbl
@override
def open_table(self, name: str) -> LanceTable:
def open_table(
self, name: str, *, index_cache_size: Optional[int] = None
) -> LanceTable:
"""Open a table in the database.
Parameters
@@ -419,7 +443,7 @@ class LanceDBConnection(DBConnection):
-------
A LanceTable object representing the table.
"""
return LanceTable.open(self, name)
return LanceTable.open(self, name, index_cache_size=index_cache_size)
@override
def drop_table(self, name: str, ignore_missing: bool = False):
@@ -751,7 +775,10 @@ class AsyncConnection(object):
return AsyncTable(new_table)
async def open_table(
self, name: str, storage_options: Optional[Dict[str, str]] = None
self,
name: str,
storage_options: Optional[Dict[str, str]] = None,
index_cache_size: Optional[int] = None,
) -> Table:
"""Open a Lance Table in the database.
@@ -764,12 +791,22 @@ class AsyncConnection(object):
connection will be inherited by the table, but can be overridden here.
See available options at
https://lancedb.github.io/lancedb/guides/storage/
index_cache_size: int, default 256
Set the size of the index cache, specified as a number of entries
The exact meaning of an "entry" will depend on the type of index:
* IVF - there is one entry for each IVF partition
* BTREE - there is one entry for the entire index
This cache applies to the entire opened table, across all indices.
Setting this value higher will increase performance on larger datasets
at the expense of more RAM
Returns
-------
A LanceTable object representing the table.
"""
table = await self._inner.open_table(name, storage_options)
table = await self._inner.open_table(name, storage_options, index_cache_size)
return AsyncTable(table)
async def drop_table(self, name: str):

View File

@@ -94,7 +94,7 @@ class RemoteDBConnection(DBConnection):
yield item
@override
def open_table(self, name: str) -> Table:
def open_table(self, name: str, *, index_cache_size: Optional[int] = None) -> Table:
"""Open a Lance Table in the database.
Parameters
@@ -110,6 +110,12 @@ class RemoteDBConnection(DBConnection):
self._client.mount_retry_adapter_for_table(name)
if index_cache_size is not None:
logging.info(
"index_cache_size is ignored in LanceDb Cloud"
" (there is no local cache to configure)"
)
# check if table exists
if self._table_cache.get(name) is None:
self._client.post(f"/v1/table/{name}/describe/")
@@ -281,6 +287,24 @@ class RemoteDBConnection(DBConnection):
)
self._table_cache.pop(name)
@override
def rename_table(self, cur_name: str, new_name: str):
"""Rename a table in the database.
Parameters
----------
cur_name: str
The current name of the table.
new_name: str
The new name of the table.
"""
self._client.post(
f"/v1/table/{cur_name}/rename/",
json={"new_table_name": new_name},
)
self._table_cache.pop(cur_name)
self._table_cache[new_name] = True
async def close(self):
"""Close the connection to the database."""
self._client.close()

View File

@@ -72,7 +72,7 @@ class RemoteTable(Table):
return resp
def index_stats(self, index_uuid: str):
"""List all the indices on the table"""
"""List all the stats of a specified index"""
resp = self._conn._client.post(
f"/v1/table/{self._name}/index/{index_uuid}/stats/"
)

View File

@@ -806,6 +806,7 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
"""Reference to the latest version of a LanceDataset."""
uri: str
index_cache_size: Optional[int] = None
read_consistency_interval: Optional[timedelta] = None
last_consistency_check: Optional[float] = None
_dataset: Optional[LanceDataset] = None
@@ -813,7 +814,9 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
@property
def dataset(self) -> LanceDataset:
if not self._dataset:
self._dataset = lance.dataset(self.uri)
self._dataset = lance.dataset(
self.uri, index_cache_size=self.index_cache_size
)
self.last_consistency_check = time.monotonic()
elif self.read_consistency_interval is not None:
now = time.monotonic()
@@ -842,12 +845,15 @@ class _LanceLatestDatasetRef(_LanceDatasetRef):
class _LanceTimeTravelRef(_LanceDatasetRef):
uri: str
version: int
index_cache_size: Optional[int] = None
_dataset: Optional[LanceDataset] = None
@property
def dataset(self) -> LanceDataset:
if not self._dataset:
self._dataset = lance.dataset(self.uri, version=self.version)
self._dataset = lance.dataset(
self.uri, version=self.version, index_cache_size=self.index_cache_size
)
return self._dataset
@dataset.setter
@@ -884,6 +890,8 @@ class LanceTable(Table):
connection: "LanceDBConnection",
name: str,
version: Optional[int] = None,
*,
index_cache_size: Optional[int] = None,
):
self._conn = connection
self.name = name
@@ -892,11 +900,13 @@ class LanceTable(Table):
self._ref = _LanceTimeTravelRef(
uri=self._dataset_uri,
version=version,
index_cache_size=index_cache_size,
)
else:
self._ref = _LanceLatestDatasetRef(
uri=self._dataset_uri,
read_consistency_interval=connection.read_consistency_interval,
index_cache_size=index_cache_size,
)
@classmethod

View File

@@ -368,6 +368,15 @@ async def test_create_exist_ok_async(tmp_path):
# await db.create_table("test", schema=bad_schema, exist_ok=True)
def test_open_table_sync(tmp_path):
db = lancedb.connect(tmp_path)
db.create_table("test", data=[{"id": 0}])
assert db.open_table("test").count_rows() == 1
assert db.open_table("test", index_cache_size=0).count_rows() == 1
with pytest.raises(FileNotFoundError, match="does not exist"):
db.open_table("does_not_exist")
@pytest.mark.asyncio
async def test_open_table(tmp_path):
db = await lancedb.connect_async(tmp_path)
@@ -397,6 +406,10 @@ async def test_open_table(tmp_path):
}
)
# No way to verify this yet, but at least make sure we
# can pass the parameter
await db.open_table("test", index_cache_size=0)
with pytest.raises(ValueError, match="was not found"):
await db.open_table("does_not_exist")

View File

@@ -134,17 +134,21 @@ impl Connection {
})
}
#[pyo3(signature = (name, storage_options = None))]
#[pyo3(signature = (name, storage_options = None, index_cache_size = None))]
pub fn open_table(
self_: PyRef<'_, Self>,
name: String,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> PyResult<&PyAny> {
let inner = self_.get_inner()?.clone();
let mut builder = inner.open_table(name);
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
Ok(Table::new(table))

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.4.17"
version = "0.4.18"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.4.17"
version = "0.4.18"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -52,7 +52,7 @@ aws-sdk-kms = { version = "1.0" }
aws-config = { version = "1.0" }
[features]
default = ["remote"]
default = []
remote = ["dep:reqwest"]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
s3-test = []

View File

@@ -33,6 +33,9 @@ use crate::table::{NativeTable, WriteOptions};
use crate::utils::validate_table_name;
use crate::Table;
#[cfg(feature = "remote")]
use log::warn;
pub const LANCE_FILE_EXTENSION: &str = "lance";
pub type TableBuilderCallback = Box<dyn FnOnce(OpenTableBuilder) -> OpenTableBuilder + Send>;
@@ -579,6 +582,7 @@ impl ConnectBuilder {
let api_key = self.api_key.ok_or_else(|| Error::InvalidInput {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
warn!("The rust implementation of the remote client is not yet ready for use.");
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
&self.uri,
&api_key,
@@ -909,12 +913,23 @@ impl ConnectionInternal for Database {
}
}
// Some ReadParams are exposed in the OpenTableBuilder, but we also
// let the user provide their own ReadParams.
//
// If we have a user provided ReadParams use that
// If we don't then start with the default ReadParams and customize it with
// the options from the OpenTableBuilder
let read_params = options.lance_read_params.unwrap_or_else(|| ReadParams {
index_cache_size: options.index_cache_size as usize,
..Default::default()
});
let native_table = Arc::new(
NativeTable::open_with_params(
&table_uri,
&options.name,
self.store_wrapper.clone(),
options.lance_read_params,
Some(read_params),
self.read_consistency_interval,
)
.await?,
@@ -1032,7 +1047,6 @@ mod tests {
}
#[tokio::test]
#[ignore = "this can't pass due to https://github.com/lancedb/lancedb/issues/1019, enable it after the bug fixed"]
async fn test_open_table() {
let tmp_dir = tempdir().unwrap();
let uri = tmp_dir.path().to_str().unwrap();

View File

@@ -46,10 +46,18 @@ impl VectorIndex {
}
}
#[derive(Debug, Deserialize)]
pub struct VectorIndexMetadata {
pub metric_type: String,
pub index_type: String,
}
#[derive(Debug, Deserialize)]
pub struct VectorIndexStatistics {
pub num_indexed_rows: usize,
pub num_unindexed_rows: usize,
pub index_type: String,
pub indices: Vec<VectorIndexMetadata>,
}
/// Builder for an IVF PQ index.

View File

@@ -350,8 +350,16 @@ mod test {
#[tokio::test]
async fn test_e2e() {
let dir1 = tempfile::tempdir().unwrap().into_path();
let dir2 = tempfile::tempdir().unwrap().into_path();
let dir1 = tempfile::tempdir()
.unwrap()
.into_path()
.canonicalize()
.unwrap();
let dir2 = tempfile::tempdir()
.unwrap()
.into_path()
.canonicalize()
.unwrap();
let secondary_store = LocalFileSystem::new_with_prefix(dir2.to_str().unwrap()).unwrap();
let object_store_wrapper = Arc::new(MirroringObjectStoreWrapper {

View File

@@ -34,6 +34,16 @@
//! cargo install lancedb
//! ```
//!
//! ## Crate Features
//!
//! ### Experimental Features
//!
//! These features are not enabled by default. They are experimental or in-development features that
//! are not yet ready to be released.
//!
//! - `remote` - Enable remote client to connect to LanceDB cloud. This is not yet fully implemented
//! and should not be enabled.
//!
//! ### Quick Start
//!
//! #### Connect to a database.

View File

@@ -87,14 +87,16 @@ impl ConnectionInternal for RemoteDatabase {
.await
.unwrap()?;
self.client
.post(&format!("/v1/table/{}/create", options.name))
let rsp = self
.client
.post(&format!("/v1/table/{}/create/", options.name))
.body(data_buffer)
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE)
// This is currently expected by LanceDb cloud but will be removed soon.
.header("x-request-id", "na")
.send()
.await?;
self.client.check_response(rsp).await?;
Ok(Table::new(Arc::new(RemoteTable::new(
self.client.clone(),

View File

@@ -1061,6 +1061,26 @@ impl NativeTable {
}
}
pub async fn get_index_type(&self, index_uuid: &str) -> Result<Option<String>> {
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.index_type)),
None => Ok(None),
}
}
pub async fn get_distance_type(&self, index_uuid: &str) -> Result<Option<String>> {
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(
stats
.indices
.iter()
.map(|i| i.metric_type.clone())
.collect(),
)),
None => Ok(None),
}
}
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
let dataset = self.dataset.get().await?;
let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;