doc: add doc to use GPU for indexing (#611)

This commit is contained in:
Lei Xu
2023-10-30 15:25:00 -07:00
committed by GitHub
parent f37fe120fd
commit 6fb539b5bf
2 changed files with 54 additions and 6 deletions

View File

@@ -71,9 +71,41 @@ a single PQ code.
### Use GPU to build vector index
Lance Python SDK has experimental GPU support for creating IVF index.
Using GPU for index creation requires [PyTorch>2.0](https://pytorch.org/) being installed.
You can specify the GPU device to train IVF partitions via
- **accelerator**: Specify to `"cuda"`` to enable GPU training.
- **accelerator**: Specify to ``cuda`` or ``mps`` (on Apple Silicon) to enable GPU training.
=== "Linux"
<!-- skip-test -->
``` { .python .copy }
# Create index using CUDA on Nvidia GPUs.
tbl.create_index(
num_partitions=256,
num_sub_vectors=96,
accelerator="cuda"
)
```
=== "Macos"
<!-- skip-test -->
```python
# Create index using MPS on Apple Silicon.
tbl.create_index(
num_partitions=256,
num_sub_vectors=96,
accelerator="mps"
)
```
Trouble shootings:
If you see ``AssertionError: Torch not compiled with CUDA enabled``, you need to [install
PyTorch with CUDA support](https://pytorch.org/get-started/locally/).
## Querying an ANN Index

View File

@@ -18,29 +18,45 @@ python_file = ".py"
python_folder = "python"
files = glob.glob(glob_string, recursive=True)
excluded_files = [f for excluded_glob in excluded_globs for f in glob.glob(excluded_glob, recursive=True)]
excluded_files = [
f
for excluded_glob in excluded_globs
for f in glob.glob(excluded_glob, recursive=True)
]
def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
in_code_block = False
# Python code has strict indentation
strip_length = 0
skip_test = False
for line in lines:
if "skip-test" in line:
skip_test = True
if line.strip().startswith(prefix + python_prefix):
in_code_block = True
strip_length = len(line) - len(line.lstrip())
elif in_code_block and line.strip().startswith(suffix):
in_code_block = False
yield "\n"
if not skip_test:
yield "\n"
skip_test = False
elif in_code_block:
yield line[strip_length:]
if not skip_test:
yield line[strip_length:]
for file in filter(lambda file: file not in excluded_files, files):
with open(file, "r") as f:
lines = list(yield_lines(iter(f), "```", "```"))
if len(lines) > 0:
out_path = Path(python_folder) / Path(file).name.strip(".md") / (Path(file).name.strip(".md") + python_file)
print(lines)
out_path = (
Path(python_folder)
/ Path(file).name.strip(".md")
/ (Path(file).name.strip(".md") + python_file)
)
print(out_path)
out_path.parent.mkdir(exist_ok=True, parents=True)
with open(out_path, "w") as out:
out.writelines(lines)
out.writelines(lines)