mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
feat: use GPU for index creation. (#540)
Bump lance to 0.8.3 to include GPU training --------- Co-authored-by: Rob Meng <rob.xu.meng@gmail.com>
This commit is contained in:
@@ -5,9 +5,9 @@ exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.8.1", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.1" }
|
||||
lance-testing = { "version" = "=0.8.1" }
|
||||
lance = { "version" = "=0.8.3", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.3" }
|
||||
lance-testing = { "version" = "=0.8.3" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "43.0.0", optional = false }
|
||||
arrow-array = "43.0"
|
||||
|
||||
@@ -33,6 +33,8 @@ The key features of LanceDB include:
|
||||
|
||||
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
|
||||
|
||||
* GPU support in building vector index(*).
|
||||
|
||||
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lanecdb.html), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
|
||||
|
||||
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
|
||||
|
||||
@@ -68,6 +68,12 @@ a single PQ code.
|
||||
<figcaption>IVF_PQ index with <code>num_partitions=2, num_sub_vectors=4</code></figcaption>
|
||||
</figure>
|
||||
|
||||
### Use GPU to build vector index
|
||||
|
||||
Lance Python SDK has experimental GPU support for creating IVF index.
|
||||
You can specify the GPU device to train IVF partitions via
|
||||
|
||||
- **accelerator**: Specify to `"cuda"`` to enable GPU training.
|
||||
|
||||
## Querying an ANN Index
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
lancedb @ git+https://github.com/lancedb/lancedb.git#egg=subdir&subdirectory=python
|
||||
-e ../../python
|
||||
numpy
|
||||
pandas
|
||||
pylance
|
||||
duckdb
|
||||
--extra-index-url https://download.pytorch.org/whl/cpu
|
||||
torch
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
import uuid
|
||||
from functools import cached_property
|
||||
from typing import Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import pyarrow as pa
|
||||
from lance import json_to_schema
|
||||
@@ -62,6 +62,7 @@ class RemoteTable(Table):
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
accelerator: Optional[str] = None,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@@ -180,6 +180,7 @@ class Table(ABC):
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
accelerator: Optional[str] = None,
|
||||
):
|
||||
"""Create an index on the table.
|
||||
|
||||
@@ -200,6 +201,9 @@ class Table(ABC):
|
||||
replace: bool, default True
|
||||
If True, replace the existing index if it exists.
|
||||
If False, raise an error if duplicate index exists.
|
||||
accelerator: str, default None
|
||||
If set, use the given accelerator to create the index.
|
||||
Only support "cuda" for now.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -479,6 +483,7 @@ class LanceTable(Table):
|
||||
num_sub_vectors=96,
|
||||
vector_column_name=VECTOR_COLUMN_NAME,
|
||||
replace: bool = True,
|
||||
accelerator: Optional[str] = None,
|
||||
):
|
||||
"""Create an index on the table."""
|
||||
self._dataset.create_index(
|
||||
@@ -488,6 +493,7 @@ class LanceTable(Table):
|
||||
num_partitions=num_partitions,
|
||||
num_sub_vectors=num_sub_vectors,
|
||||
replace=replace,
|
||||
accelerator=accelerator,
|
||||
)
|
||||
self._reset_dataset()
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
name = "lancedb"
|
||||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"pylance==0.8.1",
|
||||
"pylance==0.8.3",
|
||||
"ratelimiter~=1.0",
|
||||
"retry>=0.9.2",
|
||||
"tqdm>=4.1.0",
|
||||
|
||||
@@ -223,6 +223,7 @@ def test_create_index_method():
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
replace=True,
|
||||
accelerator=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user