diff --git a/Cargo.toml b/Cargo.toml
index 8448fd86..3c325441 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,9 +5,9 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
-lance = { "version" = "=0.8.1", "features" = ["dynamodb"] }
-lance-linalg = { "version" = "=0.8.1" }
-lance-testing = { "version" = "=0.8.1" }
+lance = { "version" = "=0.8.3", "features" = ["dynamodb"] }
+lance-linalg = { "version" = "=0.8.3" }
+lance-testing = { "version" = "=0.8.3" }
# Note that this one does not include pyarrow
arrow = { version = "43.0.0", optional = false }
arrow-array = "43.0"
diff --git a/README.md b/README.md
index 268f3041..3819a61c 100644
--- a/README.md
+++ b/README.md
@@ -1,78 +1,80 @@
-
-
-
-
-LanceDB is an open-source database for vector-search built with persistent storage, which greatly simplifies retrevial, filtering and management of embeddings.
-
-The key features of LanceDB include:
-
-* Production-scale vector search with no servers to manage.
-
-* Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more).
-
-* Support for vector similarity search, full-text search and SQL.
-
-* Native Python and Javascript/Typescript support.
-
-* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
-
-* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
-
-LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source columnar format designed for performant ML workloads.
-
-## Quick Start
-
-**Javascript**
-```shell
-npm install vectordb
-```
-
-```javascript
-const lancedb = require('vectordb');
-const db = await lancedb.connect('data/sample-lancedb');
-
-const table = await db.createTable('vectors',
- [{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
- { id: 2, vector: [1.1, 1.2], item: "bar", price: 50 }])
-
-const query = table.search([0.1, 0.3]);
-query.limit = 20;
-const results = await query.execute();
-```
-
-**Python**
-```shell
-pip install lancedb
-```
-
-```python
-import lancedb
-
-uri = "data/sample-lancedb"
-db = lancedb.connect(uri)
-table = db.create_table("my_table",
- data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
- {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
-result = table.search([100, 100]).limit(2).to_df()
-```
-
-## Blogs, Tutorials & Videos
-* 📈 2000x better performance with Lance over Parquet
-* 🤖 Build a question and answer bot with LanceDB
+
+
+
+
+LanceDB is an open-source database for vector-search built with persistent storage, which greatly simplifies retrevial, filtering and management of embeddings.
+
+The key features of LanceDB include:
+
+* Production-scale vector search with no servers to manage.
+
+* Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more).
+
+* Support for vector similarity search, full-text search and SQL.
+
+* Native Python and Javascript/Typescript support.
+
+* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
+
+* GPU support in building vector index(*).
+
+* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
+
+LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source columnar format designed for performant ML workloads.
+
+## Quick Start
+
+**Javascript**
+```shell
+npm install vectordb
+```
+
+```javascript
+const lancedb = require('vectordb');
+const db = await lancedb.connect('data/sample-lancedb');
+
+const table = await db.createTable('vectors',
+ [{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
+ { id: 2, vector: [1.1, 1.2], item: "bar", price: 50 }])
+
+const query = table.search([0.1, 0.3]);
+query.limit = 20;
+const results = await query.execute();
+```
+
+**Python**
+```shell
+pip install lancedb
+```
+
+```python
+import lancedb
+
+uri = "data/sample-lancedb"
+db = lancedb.connect(uri)
+table = db.create_table("my_table",
+ data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+ {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
+result = table.search([100, 100]).limit(2).to_df()
+```
+
+## Blogs, Tutorials & Videos
+* 📈 2000x better performance with Lance over Parquet
+* 🤖 Build a question and answer bot with LanceDB
diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md
index bf0eb547..aa4e6d0b 100644
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -68,6 +68,12 @@ a single PQ code.
IVF_PQ index with num_partitions=2, num_sub_vectors=4
+### Use GPU to build vector index
+
+Lance Python SDK has experimental GPU support for creating IVF index.
+You can specify the GPU device to train IVF partitions via
+
+- **accelerator**: Specify to `"cuda"`` to enable GPU training.
## Querying an ANN Index
diff --git a/docs/test/requirements.txt b/docs/test/requirements.txt
index c33c14e3..f61cbb9a 100644
--- a/docs/test/requirements.txt
+++ b/docs/test/requirements.txt
@@ -1,5 +1,8 @@
-lancedb @ git+https://github.com/lancedb/lancedb.git#egg=subdir&subdirectory=python
+-e ../../python
numpy
pandas
pylance
-duckdb
\ No newline at end of file
+duckdb
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch
+
diff --git a/python/lancedb/remote/table.py b/python/lancedb/remote/table.py
index 981a1696..8e51234d 100644
--- a/python/lancedb/remote/table.py
+++ b/python/lancedb/remote/table.py
@@ -13,7 +13,7 @@
import uuid
from functools import cached_property
-from typing import Union
+from typing import Optional, Union
import pyarrow as pa
from lance import json_to_schema
@@ -62,6 +62,7 @@ class RemoteTable(Table):
num_sub_vectors=96,
vector_column_name: str = VECTOR_COLUMN_NAME,
replace: bool = True,
+ accelerator: Optional[str] = None,
):
raise NotImplementedError
diff --git a/python/lancedb/table.py b/python/lancedb/table.py
index 60aba685..c8b19ab4 100644
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -180,6 +180,7 @@ class Table(ABC):
num_sub_vectors=96,
vector_column_name: str = VECTOR_COLUMN_NAME,
replace: bool = True,
+ accelerator: Optional[str] = None,
):
"""Create an index on the table.
@@ -200,6 +201,9 @@ class Table(ABC):
replace: bool, default True
If True, replace the existing index if it exists.
If False, raise an error if duplicate index exists.
+ accelerator: str, default None
+ If set, use the given accelerator to create the index.
+ Only support "cuda" for now.
"""
raise NotImplementedError
@@ -479,6 +483,7 @@ class LanceTable(Table):
num_sub_vectors=96,
vector_column_name=VECTOR_COLUMN_NAME,
replace: bool = True,
+ accelerator: Optional[str] = None,
):
"""Create an index on the table."""
self._dataset.create_index(
@@ -488,6 +493,7 @@ class LanceTable(Table):
num_partitions=num_partitions,
num_sub_vectors=num_sub_vectors,
replace=replace,
+ accelerator=accelerator,
)
self._reset_dataset()
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 8f649248..cd9c5fb9 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -2,7 +2,7 @@
name = "lancedb"
version = "0.3.0"
dependencies = [
- "pylance==0.8.1",
+ "pylance==0.8.3",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.1.0",
diff --git a/python/tests/test_table.py b/python/tests/test_table.py
index c1655601..103d26fb 100644
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -223,6 +223,7 @@ def test_create_index_method():
num_partitions=256,
num_sub_vectors=96,
replace=True,
+ accelerator=None,
)