lancedb/python/python/lancedb/_lancedb.pyi

from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal

import pyarrow as pa

from .index import (
    BTree,
    IvfFlat,
    IvfPq,
    IvfSq,
    Bitmap,
    LabelList,
    Fm,
    HnswPq,
    HnswSq,
    HnswFlat,
    FTS,
)
from lance_namespace import (
    ListNamespacesResponse,
    CreateNamespaceResponse,
    DropNamespaceResponse,
    DescribeNamespaceResponse,
    ListTablesResponse,
)
from .remote import ClientConfig

IvfHnswPq: type[HnswPq] = HnswPq
IvfHnswSq: type[HnswSq] = HnswSq
IvfHnswFlat: type[HnswFlat] = HnswFlat

class PyExpr:
    """A type-safe DataFusion expression node (Rust-side handle)."""

    def eq(self, other: "PyExpr") -> "PyExpr": ...
    def ne(self, other: "PyExpr") -> "PyExpr": ...
    def lt(self, other: "PyExpr") -> "PyExpr": ...
    def lte(self, other: "PyExpr") -> "PyExpr": ...
    def gt(self, other: "PyExpr") -> "PyExpr": ...
    def gte(self, other: "PyExpr") -> "PyExpr": ...
    def and_(self, other: "PyExpr") -> "PyExpr": ...
    def or_(self, other: "PyExpr") -> "PyExpr": ...
    def not_(self) -> "PyExpr": ...
    def add(self, other: "PyExpr") -> "PyExpr": ...
    def sub(self, other: "PyExpr") -> "PyExpr": ...
    def mul(self, other: "PyExpr") -> "PyExpr": ...
    def div(self, other: "PyExpr") -> "PyExpr": ...
    def lower(self) -> "PyExpr": ...
    def upper(self) -> "PyExpr": ...
    def contains(self, substr: "PyExpr") -> "PyExpr": ...
    def isin(self, values: List["PyExpr"]) -> "PyExpr": ...
    def cast(self, data_type: pa.DataType) -> "PyExpr": ...
    def to_sql(self) -> str: ...

def expr_col(name: str) -> PyExpr: ...
def expr_lit(value: Union[bool, int, float, str, bytes]) -> PyExpr: ...
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...

class Session:
    def __init__(
        self,
        index_cache_size_bytes: Optional[int] = None,
        metadata_cache_size_bytes: Optional[int] = None,
    ): ...
    @staticmethod
    def default() -> "Session": ...
    @property
    def size_bytes(self) -> int: ...
    @property
    def approx_num_items(self) -> int: ...

class Connection(object):
    uri: str
    async def is_open(self): ...
    async def close(self): ...
    async def list_namespaces(
        self,
        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListNamespacesResponse: ...
    async def create_namespace(
        self,
        namespace_path: List[str],
        mode: Optional[str] = None,
        properties: Optional[Dict[str, str]] = None,
    ) -> CreateNamespaceResponse: ...
    async def drop_namespace(
        self,
        namespace_path: List[str],
        mode: Optional[str] = None,
        behavior: Optional[str] = None,
    ) -> DropNamespaceResponse: ...
    async def describe_namespace(
        self,
        namespace_path: List[str],
    ) -> DescribeNamespaceResponse: ...
    async def list_tables(
        self,
        namespace_path: Optional[List[str]] = None,
        page_token: Optional[str] = None,
        limit: Optional[int] = None,
    ) -> ListTablesResponse: ...
    async def table_names(
        self,
        namespace_path: Optional[List[str]],
        start_after: Optional[str],
        limit: Optional[int],
    ) -> list[str]: ...  # Deprecated: Use list_tables instead
    async def create_table(
        self,
        name: str,
        mode: str,
        data: pa.RecordBatchReader,
        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
        location: Optional[str] = None,
    ) -> Table: ...
    async def create_empty_table(
        self,
        name: str,
        mode: str,
        schema: pa.Schema,
        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
        location: Optional[str] = None,
    ) -> Table: ...
    async def open_table(
        self,
        name: str,
        namespace_path: Optional[List[str]] = None,
        storage_options: Optional[Dict[str, str]] = None,
        index_cache_size: Optional[int] = None,
        location: Optional[str] = None,
    ) -> Table: ...
    async def clone_table(
        self,
        target_table_name: str,
        source_uri: str,
        target_namespace_path: Optional[List[str]] = None,
        source_version: Optional[int] = None,
        source_tag: Optional[str] = None,
        is_shallow: bool = True,
    ) -> Table: ...
    async def rename_table(
        self,
        cur_name: str,
        new_name: str,
        cur_namespace_path: Optional[List[str]] = None,
        new_namespace_path: Optional[List[str]] = None,
    ) -> None: ...
    async def drop_table(
        self, name: str, namespace_path: Optional[List[str]] = None
    ) -> None: ...
    async def drop_all_tables(
        self, namespace_path: Optional[List[str]] = None
    ) -> None: ...
    async def namespace_client_config(
        self,
    ) -> Dict[str, Any]: ...

class Table:
    def name(self) -> str: ...
    def __repr__(self) -> str: ...
    def is_open(self) -> bool: ...
    def close(self) -> None: ...
    async def schema(self) -> pa.Schema: ...
    async def add(
        self,
        data: pa.RecordBatchReader,
        mode: Literal["append", "overwrite"],
        progress: Optional[Any] = None,
    ) -> AddResult: ...
    async def update(
        self, updates: Dict[str, str], where: Optional[str]
    ) -> UpdateResult: ...
    async def count_rows(self, filter: Optional[str]) -> int: ...
    async def create_index(
        self,
        column: str,
        index: Union[
            IvfFlat,
            IvfSq,
            IvfPq,
            HnswPq,
            HnswSq,
            HnswFlat,
            BTree,
            Bitmap,
            LabelList,
            Fm,
            FTS,
        ],
        replace: Optional[bool],
        wait_timeout: Optional[object],
        *,
        name: Optional[str],
        train: Optional[bool],
    ): ...
    async def list_versions(self) -> List[Dict[str, Any]]: ...
    async def version(self) -> int: ...
    async def checkout(self, version: Union[int, str]): ...
    async def checkout_latest(self): ...
    async def restore(self, version: Optional[Union[int, str]] = None): ...
    async def prewarm_index(self, index_name: str) -> None: ...
    async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
    async def list_indices(self) -> list[IndexConfig]: ...
    async def delete(self, filter: Union[str, PyExpr]) -> DeleteResult: ...
    async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
    async def add_columns_with_schema(self, schema: pa.Schema) -> AddColumnsResult: ...
    async def alter_columns(
        self, columns: list[dict[str, Any]]
    ) -> AlterColumnsResult: ...
    async def update_field_metadata(
        self, updates: list[dict[str, Any]]
    ) -> UpdateFieldMetadataResult: ...
    async def optimize(
        self,
        *,
        cleanup_since_ms: Optional[int] = None,
        delete_unverified: Optional[bool] = None,
    ) -> OptimizeStats: ...
    async def uri(self) -> str: ...
    async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
    async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
    async def set_unenforced_primary_key(self, columns: List[str]) -> None: ...
    async def set_lsm_write_spec(self, spec: LsmWriteSpec) -> None: ...
    async def unset_lsm_write_spec(self) -> None: ...
    async def close_lsm_writers(self) -> None: ...
    @property
    def tags(self) -> Tags: ...
    @property
    def branches(self) -> Branches: ...
    def current_branch(self) -> Optional[str]: ...
    def query(self) -> Query: ...
    def take_offsets(self, offsets: list[int]) -> TakeQuery: ...
    def take_row_ids(self, row_ids: list[int]) -> TakeQuery: ...
    def vector_search(self) -> VectorQuery: ...

class Tags:
    async def list(self) -> Dict[str, Tag]: ...
    async def get_version(self, tag: str) -> int: ...
    async def create(self, tag: str, version: int): ...
    async def delete(self, tag: str): ...
    async def update(self, tag: str, version: int): ...

class Branches:
    async def list(self) -> Dict[str, Any]: ...
    async def create(
        self,
        name: str,
        from_ref: Optional[str] = None,
        from_version: Optional[int] = None,
    ) -> Table: ...
    async def checkout(self, name: str, version: Optional[int] = None) -> Table: ...
    async def delete(self, name: str) -> None: ...

class IndexConfig:
    name: str
    index_type: str
    columns: List[str]
    index_uuid: Optional[str]
    type_url: Optional[str]
    created_at: Optional[datetime]
    num_indexed_rows: Optional[int]
    num_unindexed_rows: Optional[int]
    size_bytes: Optional[int]
    num_segments: Optional[int]
    index_version: Optional[int]
    index_details: Optional[Any]

async def connect(
    uri: str,
    api_key: Optional[str],
    region: Optional[str],
    host_override: Optional[str],
    read_consistency_interval: Optional[float],
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
    session: Optional[Session],
    manifest_enabled: bool = False,
    namespace_client_properties: Optional[Dict[str, str]] = None,
) -> Connection: ...

class RecordBatchStream:
    @property
    def schema(self) -> pa.Schema: ...
    def __aiter__(self) -> "RecordBatchStream": ...
    async def __anext__(self) -> pa.RecordBatch: ...

class ColumnOrdering(TypedDict):
    column_name: str
    ascending: bool
    nulls_first: bool

class Query:
    def where(self, filter: str): ...
    def where_expr(self, expr: PyExpr): ...
    def select(self, columns: List[Tuple[str, str]]): ...
    def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
    def select_columns(self, columns: List[str]): ...
    def limit(self, limit: int): ...
    def offset(self, offset: int): ...
    def fast_search(self): ...
    def with_row_id(self): ...
    def postfilter(self): ...
    def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
    def nearest_to_text(self, query: dict) -> FTSQuery: ...
    def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
    async def output_schema(self) -> pa.Schema: ...
    async def execute(
        self, max_batch_length: Optional[int], timeout: Optional[timedelta]
    ) -> RecordBatchStream: ...
    async def explain_plan(self, verbose: Optional[bool]) -> str: ...
    async def analyze_plan(self) -> str: ...
    def to_query_request(self) -> PyQueryRequest: ...

class TakeQuery:
    def select(self, columns: List[str]): ...
    def with_row_id(self): ...
    async def output_schema(self) -> pa.Schema: ...
    async def execute(self) -> RecordBatchStream: ...
    def to_query_request(self) -> PyQueryRequest: ...

class FTSQuery:
    def where(self, filter: str): ...
    def where_expr(self, expr: PyExpr): ...
    def select(self, columns: List[Tuple[str, str]]): ...
    def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
    def limit(self, limit: int): ...
    def offset(self, offset: int): ...
    def fast_search(self): ...
    def with_row_id(self): ...
    def postfilter(self): ...
    def get_query(self) -> str: ...
    def add_query_vector(self, query_vec: pa.Array) -> None: ...
    def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
    def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
    async def output_schema(self) -> pa.Schema: ...
    async def execute(
        self, max_batch_length: Optional[int], timeout: Optional[timedelta]
    ) -> RecordBatchStream: ...
    def to_query_request(self) -> PyQueryRequest: ...

class VectorQuery:
    async def output_schema(self) -> pa.Schema: ...
    async def execute(self) -> RecordBatchStream: ...
    def where(self, filter: str): ...
    def where_expr(self, expr: PyExpr): ...
    def select(self, columns: List[Tuple[str, str]]): ...
    def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
    def select_with_projection(self, columns: Tuple[str, str]): ...
    def limit(self, limit: int): ...
    def offset(self, offset: int): ...
    def column(self, column: str): ...
    def distance_type(self, distance_type: str): ...
    def postfilter(self): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
    def minimum_nprobes(self, minimum_nprobes: int): ...
    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def nearest_to_text(self, query: dict) -> HybridQuery: ...
    def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
    def to_query_request(self) -> PyQueryRequest: ...

class HybridQuery:
    def where(self, filter: str): ...
    def where_expr(self, expr: PyExpr): ...
    def select(self, columns: List[Tuple[str, str]]): ...
    def select_expr(self, columns: List[Tuple[str, PyExpr]]): ...
    def limit(self, limit: int): ...
    def offset(self, offset: int): ...
    def fast_search(self): ...
    def with_row_id(self): ...
    def postfilter(self): ...
    def distance_type(self, distance_type: str): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
    def minimum_nprobes(self, minimum_nprobes: int): ...
    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
    def to_vector_query(self) -> VectorQuery: ...
    def to_fts_query(self) -> FTSQuery: ...
    def get_limit(self) -> int: ...
    def get_with_row_id(self) -> bool: ...
    def to_query_request(self) -> PyQueryRequest: ...

class FullTextQuery:
    pass

class PyQueryRequest:
    limit: Optional[int]
    offset: Optional[int]
    filter: Optional[Union[str, bytes]]
    full_text_search: Optional[FullTextQuery]
    select: Optional[Union[str, List[str]]]
    fast_search: Optional[bool]
    with_row_id: Optional[bool]
    column: Optional[str]
    query_vector: Optional[List[pa.Array]]
    minimum_nprobes: Optional[int]
    maximum_nprobes: Optional[int]
    lower_bound: Optional[float]
    upper_bound: Optional[float]
    ef: Optional[int]
    refine_factor: Optional[int]
    distance_type: Optional[str]
    bypass_vector_index: Optional[bool]
    postfilter: Optional[bool]
    norm: Optional[str]
    order_by: Optional[List[ColumnOrdering]]

class CompactionStats:
    fragments_removed: int
    fragments_added: int
    files_removed: int
    files_added: int

class CleanupStats:
    bytes_removed: int
    old_versions: int

class RemovalStats:
    bytes_removed: int
    old_versions_removed: int

class OptimizeStats:
    compaction: CompactionStats
    prune: RemovalStats

class Tag(TypedDict):
    version: int
    manifest_size: int

class AddResult:
    version: int

class DeleteResult:
    version: int

class UpdateResult:
    rows_updated: int
    version: int

class MergeResult:
    version: int
    num_updated_rows: int
    num_inserted_rows: int
    num_deleted_rows: int
    num_attempts: int
    num_rows: int

class LsmWriteSpec:
    """Specification selecting Lance's MemWAL LSM-style write path for
    `merge_insert`."""

    @staticmethod
    def bucket(column: str, num_buckets: int) -> "LsmWriteSpec": ...
    @staticmethod
    def identity(column: str) -> "LsmWriteSpec": ...
    @staticmethod
    def unsharded() -> "LsmWriteSpec": ...
    def with_maintained_indexes(self, indexes: List[str]) -> "LsmWriteSpec":
        """Return a copy of this spec asking the MemWAL to keep the named
        indexes up to date as rows are appended."""
        ...
    def with_writer_config_defaults(self, defaults: Dict[str, str]) -> "LsmWriteSpec":
        """Return a copy of this spec recording the given default
        `ShardWriter` configuration in the MemWAL index."""
        ...
    @property
    def spec_type(self) -> str:
        """One of 'bucket', 'identity', or 'unsharded'."""
        ...
    @property
    def column(self) -> Optional[str]: ...
    @property
    def num_buckets(self) -> Optional[int]: ...
    @property
    def maintained_indexes(self) -> List[str]: ...
    @property
    def writer_config_defaults(self) -> Dict[str, str]: ...

class AddColumnsResult:
    version: int

class AlterColumnsResult:
    version: int

class UpdateFieldMetadataResult:
    version: int

class DropColumnsResult:
    version: int

class AsyncPermutationBuilder:
    def select(self, projections: Dict[str, str]) -> "AsyncPermutationBuilder": ...
    def split_random(
        self,
        *,
        ratios: Optional[List[float]] = None,
        counts: Optional[List[int]] = None,
        fixed: Optional[int] = None,
        seed: Optional[int] = None,
    ) -> "AsyncPermutationBuilder": ...
    def split_hash(
        self, columns: List[str], split_weights: List[int], *, discard_weight: int = 0
    ) -> "AsyncPermutationBuilder": ...
    def split_sequential(
        self,
        *,
        ratios: Optional[List[float]] = None,
        counts: Optional[List[int]] = None,
        fixed: Optional[int] = None,
    ) -> "AsyncPermutationBuilder": ...
    def split_calculated(self, calculation: str) -> "AsyncPermutationBuilder": ...
    def shuffle(
        self, seed: Optional[int], clump_size: Optional[int]
    ) -> "AsyncPermutationBuilder": ...
    def filter(self, filter: str) -> "AsyncPermutationBuilder": ...
    async def execute(self) -> Table: ...

def async_permutation_builder(
    table: Table,
) -> AsyncPermutationBuilder: ...
def fts_query_to_json(query: Any) -> str: ...

class PermutationReader:
    def __init__(self, base_table: Table, permutation_table: Table): ...