mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-25 22:29:58 +00:00
This pipes the num_attempts field from lance's merge insert result through lancedb. This allows callers of merge_insert to get a better idea of whether transaction conflicts are occurring.
354 lines
12 KiB
Python
354 lines
12 KiB
Python
from datetime import timedelta
|
|
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
|
|
|
import pyarrow as pa
|
|
|
|
from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
|
|
from .io import StorageOptionsProvider
|
|
from .remote import ClientConfig
|
|
|
|
class Session:
|
|
def __init__(
|
|
self,
|
|
index_cache_size_bytes: Optional[int] = None,
|
|
metadata_cache_size_bytes: Optional[int] = None,
|
|
): ...
|
|
@staticmethod
|
|
def default() -> "Session": ...
|
|
@property
|
|
def size_bytes(self) -> int: ...
|
|
@property
|
|
def approx_num_items(self) -> int: ...
|
|
|
|
class Connection(object):
|
|
uri: str
|
|
async def is_open(self): ...
|
|
async def close(self): ...
|
|
async def list_namespaces(
|
|
self,
|
|
namespace: List[str],
|
|
page_token: Optional[str],
|
|
limit: Optional[int],
|
|
) -> List[str]: ...
|
|
async def create_namespace(self, namespace: List[str]) -> None: ...
|
|
async def drop_namespace(self, namespace: List[str]) -> None: ...
|
|
async def table_names(
|
|
self,
|
|
namespace: List[str],
|
|
start_after: Optional[str],
|
|
limit: Optional[int],
|
|
) -> list[str]: ...
|
|
async def create_table(
|
|
self,
|
|
name: str,
|
|
mode: str,
|
|
data: pa.RecordBatchReader,
|
|
namespace: List[str] = [],
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def create_empty_table(
|
|
self,
|
|
name: str,
|
|
mode: str,
|
|
schema: pa.Schema,
|
|
namespace: List[str] = [],
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def open_table(
|
|
self,
|
|
name: str,
|
|
namespace: List[str] = [],
|
|
storage_options: Optional[Dict[str, str]] = None,
|
|
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
|
index_cache_size: Optional[int] = None,
|
|
location: Optional[str] = None,
|
|
) -> Table: ...
|
|
async def clone_table(
|
|
self,
|
|
target_table_name: str,
|
|
source_uri: str,
|
|
target_namespace: List[str] = [],
|
|
source_version: Optional[int] = None,
|
|
source_tag: Optional[str] = None,
|
|
is_shallow: bool = True,
|
|
) -> Table: ...
|
|
async def rename_table(
|
|
self,
|
|
cur_name: str,
|
|
new_name: str,
|
|
cur_namespace: List[str] = [],
|
|
new_namespace: List[str] = [],
|
|
) -> None: ...
|
|
async def drop_table(self, name: str, namespace: List[str] = []) -> None: ...
|
|
async def drop_all_tables(self, namespace: List[str] = []) -> None: ...
|
|
|
|
class Table:
|
|
def name(self) -> str: ...
|
|
def __repr__(self) -> str: ...
|
|
def is_open(self) -> bool: ...
|
|
def close(self) -> None: ...
|
|
async def schema(self) -> pa.Schema: ...
|
|
async def add(
|
|
self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
|
|
) -> AddResult: ...
|
|
async def update(
|
|
self, updates: Dict[str, str], where: Optional[str]
|
|
) -> UpdateResult: ...
|
|
async def count_rows(self, filter: Optional[str]) -> int: ...
|
|
async def create_index(
|
|
self,
|
|
column: str,
|
|
index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
|
|
replace: Optional[bool],
|
|
wait_timeout: Optional[object],
|
|
*,
|
|
name: Optional[str],
|
|
train: Optional[bool],
|
|
): ...
|
|
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
|
async def version(self) -> int: ...
|
|
async def checkout(self, version: Union[int, str]): ...
|
|
async def checkout_latest(self): ...
|
|
async def restore(self, version: Optional[Union[int, str]] = None): ...
|
|
async def list_indices(self) -> list[IndexConfig]: ...
|
|
async def delete(self, filter: str) -> DeleteResult: ...
|
|
async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
|
|
async def add_columns_with_schema(self, schema: pa.Schema) -> AddColumnsResult: ...
|
|
async def alter_columns(
|
|
self, columns: list[dict[str, Any]]
|
|
) -> AlterColumnsResult: ...
|
|
async def optimize(
|
|
self,
|
|
*,
|
|
cleanup_since_ms: Optional[int] = None,
|
|
delete_unverified: Optional[bool] = None,
|
|
) -> OptimizeStats: ...
|
|
@property
|
|
def tags(self) -> Tags: ...
|
|
def query(self) -> Query: ...
|
|
def take_offsets(self, offsets: list[int]) -> TakeQuery: ...
|
|
def take_row_ids(self, row_ids: list[int]) -> TakeQuery: ...
|
|
def vector_search(self) -> VectorQuery: ...
|
|
|
|
class Tags:
|
|
async def list(self) -> Dict[str, Tag]: ...
|
|
async def get_version(self, tag: str) -> int: ...
|
|
async def create(self, tag: str, version: int): ...
|
|
async def delete(self, tag: str): ...
|
|
async def update(self, tag: str, version: int): ...
|
|
|
|
class IndexConfig:
|
|
name: str
|
|
index_type: str
|
|
columns: List[str]
|
|
|
|
async def connect(
|
|
uri: str,
|
|
api_key: Optional[str],
|
|
region: Optional[str],
|
|
host_override: Optional[str],
|
|
read_consistency_interval: Optional[float],
|
|
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
|
storage_options: Optional[Dict[str, str]],
|
|
session: Optional[Session],
|
|
) -> Connection: ...
|
|
|
|
class RecordBatchStream:
|
|
@property
|
|
def schema(self) -> pa.Schema: ...
|
|
def __aiter__(self) -> "RecordBatchStream": ...
|
|
async def __anext__(self) -> pa.RecordBatch: ...
|
|
|
|
class Query:
|
|
def where(self, filter: str): ...
|
|
def select(self, columns: Tuple[str, str]): ...
|
|
def select_columns(self, columns: List[str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
|
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(
|
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
|
) -> RecordBatchStream: ...
|
|
async def explain_plan(self, verbose: Optional[bool]) -> str: ...
|
|
async def analyze_plan(self) -> str: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class TakeQuery:
|
|
def select(self, columns: List[str]): ...
|
|
def with_row_id(self): ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(self) -> RecordBatchStream: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class FTSQuery:
|
|
def where(self, filter: str): ...
|
|
def select(self, columns: List[str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def get_query(self) -> str: ...
|
|
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
|
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(
|
|
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
|
) -> RecordBatchStream: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class VectorQuery:
|
|
async def output_schema(self) -> pa.Schema: ...
|
|
async def execute(self) -> RecordBatchStream: ...
|
|
def where(self, filter: str): ...
|
|
def select(self, columns: List[str]): ...
|
|
def select_with_projection(self, columns: Tuple[str, str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def column(self, column: str): ...
|
|
def distance_type(self, distance_type: str): ...
|
|
def postfilter(self): ...
|
|
def refine_factor(self, refine_factor: int): ...
|
|
def nprobes(self, nprobes: int): ...
|
|
def minimum_nprobes(self, minimum_nprobes: int): ...
|
|
def maximum_nprobes(self, maximum_nprobes: int): ...
|
|
def bypass_vector_index(self): ...
|
|
def nearest_to_text(self, query: dict) -> HybridQuery: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class HybridQuery:
|
|
def where(self, filter: str): ...
|
|
def select(self, columns: List[str]): ...
|
|
def limit(self, limit: int): ...
|
|
def offset(self, offset: int): ...
|
|
def fast_search(self): ...
|
|
def with_row_id(self): ...
|
|
def postfilter(self): ...
|
|
def distance_type(self, distance_type: str): ...
|
|
def refine_factor(self, refine_factor: int): ...
|
|
def nprobes(self, nprobes: int): ...
|
|
def minimum_nprobes(self, minimum_nprobes: int): ...
|
|
def maximum_nprobes(self, maximum_nprobes: int): ...
|
|
def bypass_vector_index(self): ...
|
|
def to_vector_query(self) -> VectorQuery: ...
|
|
def to_fts_query(self) -> FTSQuery: ...
|
|
def get_limit(self) -> int: ...
|
|
def get_with_row_id(self) -> bool: ...
|
|
def to_query_request(self) -> PyQueryRequest: ...
|
|
|
|
class FullTextQuery:
|
|
pass
|
|
|
|
class PyQueryRequest:
|
|
limit: Optional[int]
|
|
offset: Optional[int]
|
|
filter: Optional[Union[str, bytes]]
|
|
full_text_search: Optional[FullTextQuery]
|
|
select: Optional[Union[str, List[str]]]
|
|
fast_search: Optional[bool]
|
|
with_row_id: Optional[bool]
|
|
column: Optional[str]
|
|
query_vector: Optional[List[pa.Array]]
|
|
minimum_nprobes: Optional[int]
|
|
maximum_nprobes: Optional[int]
|
|
lower_bound: Optional[float]
|
|
upper_bound: Optional[float]
|
|
ef: Optional[int]
|
|
refine_factor: Optional[int]
|
|
distance_type: Optional[str]
|
|
bypass_vector_index: Optional[bool]
|
|
postfilter: Optional[bool]
|
|
norm: Optional[str]
|
|
|
|
class CompactionStats:
|
|
fragments_removed: int
|
|
fragments_added: int
|
|
files_removed: int
|
|
files_added: int
|
|
|
|
class CleanupStats:
|
|
bytes_removed: int
|
|
old_versions: int
|
|
|
|
class RemovalStats:
|
|
bytes_removed: int
|
|
old_versions_removed: int
|
|
|
|
class OptimizeStats:
|
|
compaction: CompactionStats
|
|
prune: RemovalStats
|
|
|
|
class Tag(TypedDict):
|
|
version: int
|
|
manifest_size: int
|
|
|
|
class AddResult:
|
|
version: int
|
|
|
|
class DeleteResult:
|
|
version: int
|
|
|
|
class UpdateResult:
|
|
rows_updated: int
|
|
version: int
|
|
|
|
class MergeResult:
|
|
version: int
|
|
num_updated_rows: int
|
|
num_inserted_rows: int
|
|
num_deleted_rows: int
|
|
num_attempts: int
|
|
|
|
class AddColumnsResult:
|
|
version: int
|
|
|
|
class AlterColumnsResult:
|
|
version: int
|
|
|
|
class DropColumnsResult:
|
|
version: int
|
|
|
|
class AsyncPermutationBuilder:
|
|
def select(self, projections: Dict[str, str]) -> "AsyncPermutationBuilder": ...
|
|
def split_random(
|
|
self,
|
|
*,
|
|
ratios: Optional[List[float]] = None,
|
|
counts: Optional[List[int]] = None,
|
|
fixed: Optional[int] = None,
|
|
seed: Optional[int] = None,
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_hash(
|
|
self, columns: List[str], split_weights: List[int], *, discard_weight: int = 0
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_sequential(
|
|
self,
|
|
*,
|
|
ratios: Optional[List[float]] = None,
|
|
counts: Optional[List[int]] = None,
|
|
fixed: Optional[int] = None,
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def split_calculated(self, calculation: str) -> "AsyncPermutationBuilder": ...
|
|
def shuffle(
|
|
self, seed: Optional[int], clump_size: Optional[int]
|
|
) -> "AsyncPermutationBuilder": ...
|
|
def filter(self, filter: str) -> "AsyncPermutationBuilder": ...
|
|
async def execute(self) -> Table: ...
|
|
|
|
def async_permutation_builder(
|
|
table: Table, dest_table_name: str
|
|
) -> AsyncPermutationBuilder: ...
|
|
def fts_query_to_json(query: Any) -> str: ...
|
|
|
|
class PermutationReader:
|
|
def __init__(self, base_table: Table, permutation_table: Table): ...
|