doc: add doc to use GPU for indexing (#611)

This commit is contained in:
Lei Xu
2023-10-30 15:25:00 -07:00
committed by Weston Pace
parent 87364532bf
commit b5e57ebce3
2 changed files with 54 additions and 6 deletions

View File

@@ -71,9 +71,41 @@ a single PQ code.
### Use GPU to build vector index ### Use GPU to build vector index
Lance Python SDK has experimental GPU support for creating IVF index. Lance Python SDK has experimental GPU support for creating IVF index.
Using GPU for index creation requires [PyTorch>2.0](https://pytorch.org/) being installed.
You can specify the GPU device to train IVF partitions via You can specify the GPU device to train IVF partitions via
- **accelerator**: Specify to `"cuda"`` to enable GPU training. - **accelerator**: Specify to ``cuda`` or ``mps`` (on Apple Silicon) to enable GPU training.
=== "Linux"
<!-- skip-test -->
``` { .python .copy }
# Create index using CUDA on Nvidia GPUs.
tbl.create_index(
num_partitions=256,
num_sub_vectors=96,
accelerator="cuda"
)
```
=== "Macos"
<!-- skip-test -->
```python
# Create index using MPS on Apple Silicon.
tbl.create_index(
num_partitions=256,
num_sub_vectors=96,
accelerator="mps"
)
```
Trouble shootings:
If you see ``AssertionError: Torch not compiled with CUDA enabled``, you need to [install
PyTorch with CUDA support](https://pytorch.org/get-started/locally/).
## Querying an ANN Index ## Querying an ANN Index

View File

@@ -18,29 +18,45 @@ python_file = ".py"
python_folder = "python" python_folder = "python"
files = glob.glob(glob_string, recursive=True) files = glob.glob(glob_string, recursive=True)
excluded_files = [f for excluded_glob in excluded_globs for f in glob.glob(excluded_glob, recursive=True)] excluded_files = [
f
for excluded_glob in excluded_globs
for f in glob.glob(excluded_glob, recursive=True)
]
def yield_lines(lines: Iterator[str], prefix: str, suffix: str): def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
in_code_block = False in_code_block = False
# Python code has strict indentation # Python code has strict indentation
strip_length = 0 strip_length = 0
skip_test = False
for line in lines: for line in lines:
if "skip-test" in line:
skip_test = True
if line.strip().startswith(prefix + python_prefix): if line.strip().startswith(prefix + python_prefix):
in_code_block = True in_code_block = True
strip_length = len(line) - len(line.lstrip()) strip_length = len(line) - len(line.lstrip())
elif in_code_block and line.strip().startswith(suffix): elif in_code_block and line.strip().startswith(suffix):
in_code_block = False in_code_block = False
yield "\n" if not skip_test:
yield "\n"
skip_test = False
elif in_code_block: elif in_code_block:
yield line[strip_length:] if not skip_test:
yield line[strip_length:]
for file in filter(lambda file: file not in excluded_files, files): for file in filter(lambda file: file not in excluded_files, files):
with open(file, "r") as f: with open(file, "r") as f:
lines = list(yield_lines(iter(f), "```", "```")) lines = list(yield_lines(iter(f), "```", "```"))
if len(lines) > 0: if len(lines) > 0:
out_path = Path(python_folder) / Path(file).name.strip(".md") / (Path(file).name.strip(".md") + python_file) print(lines)
out_path = (
Path(python_folder)
/ Path(file).name.strip(".md")
/ (Path(file).name.strip(".md") + python_file)
)
print(out_path) print(out_path)
out_path.parent.mkdir(exist_ok=True, parents=True) out_path.parent.mkdir(exist_ok=True, parents=True)
with open(out_path, "w") as out: with open(out_path, "w") as out:
out.writelines(lines) out.writelines(lines)