feat: use GPU for index creation. (#540)

Bump lance to 0.8.3 to include GPU training

---------

Co-authored-by: Rob Meng <rob.xu.meng@gmail.com>
This commit is contained in:
Lei Xu
2023-10-05 20:49:00 -07:00
committed by GitHub
parent 88d8d7249e
commit a26c8f3316
8 changed files with 104 additions and 85 deletions

View File

@@ -13,7 +13,7 @@
import uuid
from functools import cached_property
from typing import Union
from typing import Optional, Union
import pyarrow as pa
from lance import json_to_schema
@@ -62,6 +62,7 @@ class RemoteTable(Table):
num_sub_vectors=96,
vector_column_name: str = VECTOR_COLUMN_NAME,
replace: bool = True,
accelerator: Optional[str] = None,
):
raise NotImplementedError

View File

@@ -180,6 +180,7 @@ class Table(ABC):
num_sub_vectors=96,
vector_column_name: str = VECTOR_COLUMN_NAME,
replace: bool = True,
accelerator: Optional[str] = None,
):
"""Create an index on the table.
@@ -200,6 +201,9 @@ class Table(ABC):
replace: bool, default True
If True, replace the existing index if it exists.
If False, raise an error if duplicate index exists.
accelerator: str, default None
If set, use the given accelerator to create the index.
Only support "cuda" for now.
"""
raise NotImplementedError
@@ -479,6 +483,7 @@ class LanceTable(Table):
num_sub_vectors=96,
vector_column_name=VECTOR_COLUMN_NAME,
replace: bool = True,
accelerator: Optional[str] = None,
):
"""Create an index on the table."""
self._dataset.create_index(
@@ -488,6 +493,7 @@ class LanceTable(Table):
num_partitions=num_partitions,
num_sub_vectors=num_sub_vectors,
replace=replace,
accelerator=accelerator,
)
self._reset_dataset()

View File

@@ -2,7 +2,7 @@
name = "lancedb"
version = "0.3.0"
dependencies = [
"pylance==0.8.1",
"pylance==0.8.3",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.1.0",

View File

@@ -223,6 +223,7 @@ def test_create_index_method():
num_partitions=256,
num_sub_vectors=96,
replace=True,
accelerator=None,
)