mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-19 20:20:46 +00:00
## Summary Surfaces the rich per-index metadata added in #3497 to the Python and Node.js language bindings. Closes #3495. New optional fields exposed on `IndexConfig` in both bindings: - `index_uuid` / `indexUuid` — UUID of the first index segment - `type_url` / `typeUrl` — protobuf type URL for the index - `created_at` / `createdAt` — creation timestamp (milliseconds since Unix epoch) - `num_indexed_rows` / `numIndexedRows` — rows covered by the index - `num_unindexed_rows` / `numUnindexedRows` — rows not yet indexed - `size_bytes` / `sizeBytes` — total index file size in bytes - `num_segments` / `numSegments` — number of index segments - `index_version` / `indexVersion` — on-disk format version - `index_details` / `indexDetails` — type-specific JSON details string All fields are `None`/`undefined` for remote tables (which don't yet surface this metadata through the server response). ## Changes - `python/src/index.rs`: extend `IndexConfig` pyclass; update `From` impl; update `__getitem__` - `python/python/lancedb/_lancedb.pyi`: add type hints for new fields - `python/python/tests/test_table.py`: new `test_index_config_fields` test - `nodejs/src/table.rs`: extend `IndexConfig` napi struct; update `From` impl - `nodejs/__test__/table.test.ts`: new test; update existing `toEqual` assertions to `expect.objectContaining` to accommodate new fields ## Test plan - [x] Python: `uv run --extra tests pytest python/tests/test_table.py::test_index_config_fields` - [x] Node.js: `pnpm test __test__/table.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
532 lines
18 KiB
Python
532 lines
18 KiB
Python
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
|
|
|
import pyarrow as pa
|
|
|
|
from .index import (
|
|
BTree,
|
|
IvfFlat,
|
|
IvfPq,
|
|
IvfSq,
|
|
Bitmap,
|
|
LabelList,
|
|
Fm,
|
|
HnswPq,
|
|
HnswSq,
|
|
HnswFlat,
|
|
FTS,
|
|
)
|
|
from lance_namespace import (
|
|
ListNamespacesResponse,
|
|
CreateNamespaceResponse,
|
|
DropNamespaceResponse,
|
|
DescribeNamespaceResponse,
|
|
ListTablesResponse,
|
|
)
|
|
from .remote import ClientConfig
|
|
|
|
IvfHnswPq: type[HnswPq] = HnswPq
|
|
IvfHnswSq: type[HnswSq] = HnswSq
|
|
IvfHnswFlat: type[HnswFlat] = HnswFlat
|
|
|
|
class PyExpr:
|
|
"""A type-safe DataFusion expression node (Rust-side handle)."""
|
|
|
|
def eq(self, other: "PyExpr") -> "PyExpr": ...
|
|
def ne(self, other: "PyExpr") -> "PyExpr": ...
|
|
def lt(self, other: "PyExpr") -> "PyExpr": ...
|
|
def lte(self, other: "PyExpr") -> "PyExpr": ...
|
|
def gt(self, other: "PyExpr") -> "PyExpr": ...
|
|
def gte(self, other: "PyExpr") -> "PyExpr": ...
|
|
def and_(self, other: "PyExpr") -> "PyExpr": ...
|
|
def or_(self, other: "PyExpr") -> "PyExpr": ...
|
|
def not_(self) -> "PyExpr": ...
|
|
def add(self, other: "PyExpr") -> "PyExpr": ...
|
|
def sub(self, other: "PyExpr") -> "PyExpr": ...
|
|
def mul(self, other: "PyExpr") -> "PyExpr": ...
|
|
def div(self, other: "PyExpr") -> "PyExpr": ...
|
|
def lower(self) -> "PyExpr": ...
|
|
def upper(self) -> "PyExpr": ...
|
|
def contains(self, substr: "PyExpr") -> "PyExpr": ...
|
|
def isin(self, values: List["PyExpr"]) -> "PyExpr": ...
|
|
def cast(self, data_type: pa.DataType) -> "PyExpr": ...
|
|
def to_sql(self) -> str: ...
|
|
|
|
def expr_col(name: str) -> PyExpr: ...
|
|
def expr_lit(value: Union[bool, int, float, str, bytes]) -> PyExpr: ...
|
|
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...
|
|
|
|
class Session:
|
|
def __init__(
|
|
self,
|
|
index_cache_size_bytes: Optional[int] = None,
|
|
metadata_cache_size_bytes: Optional[int] = None,
|
|
): ...
|
|
@staticmethod
|
|
def default() -> "Session": ...
|
|
@property
|
|
def size_bytes(self) -> int: ...
|
|
@property
|
|
def approx_num_items(self) -> int: ...
|
|
|
|
class Connection(object):
|
|
uri: str
|
|
async def is_open(self): ...
|
|
async def close(self): ...
|
|
async def list_namespaces(
|
|
self,
|
|
namespace_path: Optional[List[str]] = None,
|
|
page_token: Optional[str] = None,
|
|
limit: Optional[int] = None,
|
|
) -> ListNamespacesResponse: ...
|
|
async def create_namespace(
|
|
self,
|
|
namespace_path: List[str],
|
|
mode: Optional[str] = None,
|
|
properties: Optional[Dict[str, str]] = None,
|
|
) -> CreateNamespaceResponse: ...
|
|
async def drop_namespace(
|
|
self,
|
|
namespace_path: List[str],
|
|
mode: Optional[str] = None,
|
|
behavior: Optional[str] = None,
|
|
) -> DropNamespaceResponse: ...
|
|
async def describe_namespace(
|
|
self,
|
|
namespace_path: List[str],
|
|
) -> DescribeNamespaceResponse: ...
|
|
async def list_tables(
|
|
self,
|
|
namespace_path: Optional[List[str]] = None,
|
|
page_token: Optional[str] = None,
|
|
limit: Optional[int] = None,
|
|
) -> ListTablesResponse: ...
|
|
async def table_names(
|
|
self,
|
|
namespace_path: Optional[List[str]],
|
|
start_after: Optional[str],
|
|
limit: Optional[int],
|
|
) -> list[str]: ... # Deprecated: Use list_tables instead
|
|
async def create_table(
|
|
self,
|
|
name: str,
|
|
mode: str,
|
|
data: pa.RecordBatchReader,
|
|
namespace_path: Optional[List[str]] = None,
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def create_empty_table(
|
|
self,
|
|
name: str,
|
|
mode: str,
|
|
schema: pa.Schema,
|
|
namespace_path: Optional[List[str]] = None,
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def open_table(
|
|
self,
|
|
name: str,
|
|
namespace_path: Optional[List[str]] = None,
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
index_cache_size: Optional[int] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def clone_table(
|
|
self,
|
|
target_table_name: str,
|
|
source_uri: str,
|
|
target_namespace_path: Optional[List[str]] = None,
|
|
source_version: Optional[int] = None,
|
|
source_tag: Optional[str] = None,
|
|
is_shallow: bool = True,
|
|
) -> Table: ...
|
|
async def rename_table(
|
|
self,
|
|
cur_name: str,
|
|
new_name: str,
|
|
cur_namespace_path: Optional[List[str]] = None,
|
|
new_namespace_path: Optional[List[str]] = None,
|
|
) -> None: ...
|
|
async def drop_table(
|
|
self, name: str, namespace_path: Optional[List[str]] = None
|
|
) -> None: ...
|
|
async def drop_all_tables(
|
|
self, namespace_path: Optional[List[str]] = None
|
|
) -> None: ...
|
|
async def namespace_client_config(
|
|
self,
|
|
) -> Dict[str, Any]: ...
|
|
|
|
class Table:
|
|
def name(self) -> str: ...
|
|
def __repr__(self) -> str: ...
|
|
def is_open(self) -> bool: ...
|
|
def close(self) -> None: ...
|
|
async def schema(self) -> pa.Schema: ...
|
|
async def add(
|
|
self,
|
|
data: pa.RecordBatchReader,
|
|
mode: Literal["append", "overwrite"],
|
|
progress: Optional[Any] = None,
|
|
) -> AddResult: ...
|
|
async def update(
|
|
self, updates: Dict[str, str], where: Optional[str]
|
|
) -> UpdateResult: ...
|
|
async def count_rows(self, filter: Optional[str]) -> int: ...
|
|
async def create_index(
|
|
self,
|
|
column: str,
|
|
index: Union[
|
|
IvfFlat,
|
|
IvfSq,
|
|
IvfPq,
|
|
HnswPq,
|
|
HnswSq,
|
|
HnswFlat,
|
|
BTree,
|
|
Bitmap,
|
|
LabelList,
|
|
Fm,
|
|
FTS,
|
|
],
|
|
replace: Optional[bool],
|
|
wait_timeout: Optional[object],
|
|
*,
|
|
name: Optional[str],
|
|
train: Optional[bool],
|
|
): ...
|
|
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
|
async def version(self) -> int: ...
|
|
async def checkout(self, version: Union[int, str]): ...
|
|
async def checkout_latest(self): ...
|
|
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
|
async def prewarm_index(self, index_name: str) -> None: ...
|
|
async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
|
|
async def list_indices(self) -> list[IndexConfig]: ...
|
|
async def delete(self, filter: Union[str, PyExpr]) -> DeleteResult: ...
|
|
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
|
async def add_columns_with_schema(self, schema: pa.Schema) -> AddColumnsResult: ...
|
|
async def alter_columns(
|
|
self, columns: list[dict[str, Any]]
|
|
) -> AlterColumnsResult: ...
|
|
async def update_field_metadata(
|
|
self, updates: list[dict[str, Any]]
|
|
) -> UpdateFieldMetadataResult: ...
|
|
async def optimize(
|
|
self,
|
|
*,
|
|
cleanup_since_ms: Optional[int] = None,
|
|
delete_unverified: Optional[bool] = None,
|
|
) -> OptimizeStats: ...
|
|
async def uri(self) -> str: ...
|
|
async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
|
|
async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
|
|
async def set_unenforced_primary_key(self, columns: List[str]) -> None: ...
|
|
async def set_lsm_write_spec(self, spec: LsmWriteSpec) -> None: ...
|
|
async def unset_lsm_write_spec(self) -> None: ...
|
|
async def close_lsm_writers(self) -> None: ...
|
|
@property
|
|
def tags(self) -> Tags: ...
|
|
@property
|
|
def branches(self) -> Branches: ...
|
|
def current_branch(self) -> Optional[str]: ...
|
|
def query(self) -> Query: ...
|
|
def take_offsets(self, offsets: list[int]) -> TakeQuery: ...
|
|
def take_row_ids(self, row_ids: list[int]) -> TakeQuery: ...
|
|
def vector_search(self) -> VectorQuery: ...
|
|
|
|
class Tags:
|
|
async def list(self) -> Dict[str, Tag]: ...
|
|
async def get_version(self, tag: str) -> int: ...
|
|
async def create(self, tag: str, version: int): ...
|
|
async def delete(self, tag: str): ...
|
|
async def update(self, tag: str, version: int): ...
|
|
|
|
class Branches:
|
|
async def list(self) -> Dict[str, Any]: ...
|
|
async def create(
|
|
self,
|
|
name: str,
|
|
from_ref: Optional[str] = None,
|
|
from_version: Optional[int] = None,
|
|
) -> Table: ...
|
|
async def checkout(self, name: str, version: Optional[int] = None) -> Table: ...
|
|
async def delete(self, name: str) -> None: ...
|
|
|
|
class IndexConfig:
|
|
name: str
|
|
index_type: str
|
|
columns: List[str]
|
|
index_uuid: Optional[str]
|
|
type_url: Optional[str]
|
|
created_at: Optional[datetime]
|
|
num_indexed_rows: Optional[int]
|
|
num_unindexed_rows: Optional[int]
|
|
size_bytes: Optional[int]
|
|
num_segments: Optional[int]
|
|
index_version: Optional[int]
|
|
index_details: Optional[Any]
|
|
|
|
async def connect(
|
|
uri: str,
|
|
api_key: Optional[str],
|
|
region: Optional[str],
|
|
host_override: Optional[str],
|
|
read_consistency_interval: Optional[float],
|
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
|
storage_options: Optional[Dict[str, str]],
|
|
session: Optional[Session],
|
|
manifest_enabled: bool = False,
|
|
namespace_client_properties: Optional[Dict[str, str]] = None,
|
|
) -> Connection: ...
|
|
|
|
class RecordBatchStream:
|
|
@property
|
|
def schema(self) -> pa.Schema: ...
|
|
def __aiter__(self) -> "RecordBatchStream": ...
|
|
async def __anext__(self) -> pa.RecordBatch: ...
|
|
|
|
class ColumnOrdering(TypedDict):
|
|
column_name: str
|
|
ascending: bool
|
|
nulls_first: bool
|
|
|
|
class Query:
|
|
def where(self, filter: str): ...
|
|
def where_expr(self, expr: PyExpr): ...
|
|
def select(self, columns: List[Tuple[str, str]]): ...
|
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
|
def select_columns(self, columns: List[str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
|
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
|
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(
|
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
|
) -> RecordBatchStream: ...
|
|
async def explain_plan(self, verbose: Optional[bool]) -> str: ...
|
|
async def analyze_plan(self) -> str: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class TakeQuery:
|
|
def select(self, columns: List[str]): ...
|
|
def with_row_id(self): ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(self) -> RecordBatchStream: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class FTSQuery:
|
|
def where(self, filter: str): ...
|
|
def where_expr(self, expr: PyExpr): ...
|
|
def select(self, columns: List[Tuple[str, str]]): ...
|
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def get_query(self) -> str: ...
|
|
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
|
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
|
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(
|
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
|
) -> RecordBatchStream: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class VectorQuery:
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(self) -> RecordBatchStream: ...
|
|
def where(self, filter: str): ...
|
|
def where_expr(self, expr: PyExpr): ...
|
|
def select(self, columns: List[Tuple[str, str]]): ...
|
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
|
def select_with_projection(self, columns: Tuple[str, str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def column(self, column: str): ...
|
|
def distance_type(self, distance_type: str): ...
|
|
def postfilter(self): ...
|
|
def refine_factor(self, refine_factor: int): ...
|
|
def nprobes(self, nprobes: int): ...
|
|
def minimum_nprobes(self, minimum_nprobes: int): ...
|
|
def maximum_nprobes(self, maximum_nprobes: int): ...
|
|
def bypass_vector_index(self): ...
|
|
def nearest_to_text(self, query: dict) -> HybridQuery: ...
|
|
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class HybridQuery:
|
|
def where(self, filter: str): ...
|
|
def where_expr(self, expr: PyExpr): ...
|
|
def select(self, columns: List[Tuple[str, str]]): ...
|
|
def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def distance_type(self, distance_type: str): ...
|
|
def refine_factor(self, refine_factor: int): ...
|
|
def nprobes(self, nprobes: int): ...
|
|
def minimum_nprobes(self, minimum_nprobes: int): ...
|
|
def maximum_nprobes(self, maximum_nprobes: int): ...
|
|
def bypass_vector_index(self): ...
|
|
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
|
def to_vector_query(self) -> VectorQuery: ...
|
|
def to_fts_query(self) -> FTSQuery: ...
|
|
def get_limit(self) -> int: ...
|
|
def get_with_row_id(self) -> bool: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class FullTextQuery:
|
|
pass
|
|
|
|
class PyQueryRequest:
|
|
limit: Optional[int]
|
|
offset: Optional[int]
|
|
filter: Optional[Union[str, bytes]]
|
|
full_text_search: Optional[FullTextQuery]
|
|
select: Optional[Union[str, List[str]]]
|
|
fast_search: Optional[bool]
|
|
with_row_id: Optional[bool]
|
|
column: Optional[str]
|
|
query_vector: Optional[List[pa.Array]]
|
|
minimum_nprobes: Optional[int]
|
|
maximum_nprobes: Optional[int]
|
|
lower_bound: Optional[float]
|
|
upper_bound: Optional[float]
|
|
ef: Optional[int]
|
|
refine_factor: Optional[int]
|
|
distance_type: Optional[str]
|
|
bypass_vector_index: Optional[bool]
|
|
postfilter: Optional[bool]
|
|
norm: Optional[str]
|
|
order_by: Optional[List[ColumnOrdering]]
|
|
|
|
class CompactionStats:
|
|
fragments_removed: int
|
|
fragments_added: int
|
|
files_removed: int
|
|
files_added: int
|
|
|
|
class CleanupStats:
|
|
bytes_removed: int
|
|
old_versions: int
|
|
|
|
class RemovalStats:
|
|
bytes_removed: int
|
|
old_versions_removed: int
|
|
|
|
class OptimizeStats:
|
|
compaction: CompactionStats
|
|
prune: RemovalStats
|
|
|
|
class Tag(TypedDict):
|
|
version: int
|
|
manifest_size: int
|
|
|
|
class AddResult:
|
|
version: int
|
|
|
|
class DeleteResult:
|
|
version: int
|
|
|
|
class UpdateResult:
|
|
rows_updated: int
|
|
version: int
|
|
|
|
class MergeResult:
|
|
version: int
|
|
num_updated_rows: int
|
|
num_inserted_rows: int
|
|
num_deleted_rows: int
|
|
num_attempts: int
|
|
num_rows: int
|
|
|
|
class LsmWriteSpec:
|
|
"""Specification selecting Lance's MemWAL LSM-style write path for
|
|
`merge_insert`."""
|
|
|
|
@staticmethod
|
|
def bucket(column: str, num_buckets: int) -> "LsmWriteSpec": ...
|
|
@staticmethod
|
|
def identity(column: str) -> "LsmWriteSpec": ...
|
|
@staticmethod
|
|
def unsharded() -> "LsmWriteSpec": ...
|
|
def with_maintained_indexes(self, indexes: List[str]) -> "LsmWriteSpec":
|
|
"""Return a copy of this spec asking the MemWAL to keep the named
|
|
indexes up to date as rows are appended."""
|
|
...
|
|
def with_writer_config_defaults(self, defaults: Dict[str, str]) -> "LsmWriteSpec":
|
|
"""Return a copy of this spec recording the given default
|
|
`ShardWriter` configuration in the MemWAL index."""
|
|
...
|
|
@property
|
|
def spec_type(self) -> str:
|
|
"""One of 'bucket', 'identity', or 'unsharded'."""
|
|
...
|
|
@property
|
|
def column(self) -> Optional[str]: ...
|
|
@property
|
|
def num_buckets(self) -> Optional[int]: ...
|
|
@property
|
|
def maintained_indexes(self) -> List[str]: ...
|
|
@property
|
|
def writer_config_defaults(self) -> Dict[str, str]: ...
|
|
|
|
class AddColumnsResult:
|
|
version: int
|
|
|
|
class AlterColumnsResult:
|
|
version: int
|
|
|
|
class UpdateFieldMetadataResult:
|
|
version: int
|
|
|
|
class DropColumnsResult:
|
|
version: int
|
|
|
|
class AsyncPermutationBuilder:
|
|
def select(self, projections: Dict[str, str]) -> "AsyncPermutationBuilder": ...
|
|
def split_random(
|
|
self,
|
|
*,
|
|
ratios: Optional[List[float]] = None,
|
|
counts: Optional[List[int]] = None,
|
|
fixed: Optional[int] = None,
|
|
seed: Optional[int] = None,
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_hash(
|
|
self, columns: List[str], split_weights: List[int], *, discard_weight: int = 0
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_sequential(
|
|
self,
|
|
*,
|
|
ratios: Optional[List[float]] = None,
|
|
counts: Optional[List[int]] = None,
|
|
fixed: Optional[int] = None,
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_calculated(self, calculation: str) -> "AsyncPermutationBuilder": ...
|
|
def shuffle(
|
|
self, seed: Optional[int], clump_size: Optional[int]
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def filter(self, filter: str) -> "AsyncPermutationBuilder": ...
|
|
async def execute(self) -> Table: ...
|
|
|
|
def async_permutation_builder(
|
|
table: Table,
|
|
) -> AsyncPermutationBuilder: ...
|
|
def fts_query_to_json(query: Any) -> str: ...
|
|
|
|
class PermutationReader:
|
|
def __init__(self, base_table: Table, permutation_table: Table): ...
|