Compare commits

..

3 Commits

Author SHA1 Message Date
ayush chaurasia
40ffe03cc8 format 2024-06-24 16:57:51 +05:30
ayush chaurasia
617ce3139b lint 2024-06-24 16:55:23 +05:30
ayush chaurasia
242bbe1897 use promote_options with concat_tables 2024-06-24 16:39:03 +05:30
9 changed files with 17 additions and 40 deletions

View File

@@ -20,11 +20,13 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.13.0", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.13.0" }
lance-linalg = { "version" = "=0.13.0" }
lance-testing = { "version" = "=0.13.0" }
lance-datafusion = { "version" = "=0.13.0" }
lance = { "version" = "=0.12.2", "features" = [
"dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
lance-index = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
lance-linalg = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
lance-testing = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
lance-datafusion = { "version" = "=0.12.2", git = "https://github.com/lancedb/lance.git", tag = "v0.12.2-beta.2" }
# Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.9.0-beta.3"
current_version = "0.8.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.9.0-beta.3"
version = "0.8.2"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.13.0",
"pylance==0.12.2-beta.2",
"ratelimiter~=1.0",
"requests>=2.31.0",
"retry>=0.9.2",
@@ -13,7 +13,6 @@ dependencies = [
"packaging",
"cachetools",
"overrides>=0.7",
"urllib3==1.26.19"
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]

View File

@@ -35,7 +35,6 @@ def connect(
host_override: Optional[str] = None,
read_consistency_interval: Optional[timedelta] = None,
request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
storage_options: Optional[Dict[str, str]] = None,
**kwargs,
) -> DBConnection:
"""Connect to a LanceDB database.
@@ -71,9 +70,6 @@ def connect(
executor will be used for making requests. This is for LanceDB Cloud
only and is only used when making batch requests (i.e., passing in
multiple queries to the search method at once).
storage_options: dict, optional
Additional options for the storage backend. See available options at
https://lancedb.github.io/lancedb/guides/storage/
Examples
--------
@@ -109,16 +105,12 @@ def connect(
region,
host_override,
request_thread_pool=request_thread_pool,
storage_options=storage_options,
**kwargs,
)
if kwargs:
raise ValueError(f"Unknown keyword arguments: {kwargs}")
return LanceDBConnection(
uri,
read_consistency_interval=read_consistency_interval,
)
return LanceDBConnection(uri, read_consistency_interval=read_consistency_interval)
async def connect_async(

View File

@@ -55,13 +55,11 @@ class RestfulLanceDBClient:
region: str
api_key: Credential
host_override: Optional[str] = attrs.field(default=None)
db_prefix: Optional[str] = attrs.field(default=None)
closed: bool = attrs.field(default=False, init=False)
connection_timeout: float = attrs.field(default=120.0, kw_only=True)
read_timeout: float = attrs.field(default=300.0, kw_only=True)
storage_options: Optional[Dict[str, str]] = attrs.field(default=None, kw_only=True)
@functools.cached_property
def session(self) -> requests.Session:
@@ -94,18 +92,6 @@ class RestfulLanceDBClient:
headers["Host"] = f"{self.db_name}.{self.region}.api.lancedb.com"
if self.host_override:
headers["x-lancedb-database"] = self.db_name
if self.storage_options:
if self.storage_options.get("account_name") is not None:
headers["x-azure-storage-account-name"] = self.storage_options[
"account_name"
]
if self.storage_options.get("azure_storage_account_name") is not None:
headers["x-azure-storage-account-name"] = self.storage_options[
"azure_storage_account_name"
]
if self.db_prefix:
headers["x-lancedb-database-prefix"] = self.db_prefix
return headers
@staticmethod
@@ -259,6 +245,7 @@ def retry_adapter(options: Dict[str, Any]) -> HTTPAdapter:
connect=connect_retries,
read=read_retries,
backoff_factor=backoff_factor,
backoff_jitter=backoff_jitter,
status_forcelist=statuses,
allowed_methods=methods,
)

View File

@@ -15,7 +15,7 @@ import inspect
import logging
import uuid
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, Iterable, List, Optional, Union
from typing import Iterable, List, Optional, Union
from urllib.parse import urlparse
from cachetools import TTLCache
@@ -44,25 +44,20 @@ class RemoteDBConnection(DBConnection):
request_thread_pool: Optional[ThreadPoolExecutor] = None,
connection_timeout: float = 120.0,
read_timeout: float = 300.0,
storage_options: Optional[Dict[str, str]] = None,
):
"""Connect to a remote LanceDB database."""
parsed = urlparse(db_url)
if parsed.scheme != "db":
raise ValueError(f"Invalid scheme: {parsed.scheme}, only accepts db://")
self.db_name = parsed.netloc
prefix = parsed.path.lstrip("/")
self.db_prefix = None if not prefix else prefix
self.api_key = api_key
self._client = RestfulLanceDBClient(
self.db_name,
region,
api_key,
host_override,
self.db_prefix,
connection_timeout=connection_timeout,
read_timeout=read_timeout,
storage_options=storage_options,
)
self._request_thread_pool = request_thread_pool
self._table_cache = TTLCache(maxsize=10000, ttl=300)

View File

@@ -119,7 +119,9 @@ class Reranker(ABC):
fts_results : pa.Table
The results from the FTS search
"""
combined = pa.concat_tables([vector_results, fts_results], promote=True)
combined = pa.concat_tables(
[vector_results, fts_results], promote_options="default"
)
row_id = combined.column("_rowid")
# deduplicate

View File

@@ -735,7 +735,7 @@ def test_create_scalar_index(db):
indices = table.to_lance().list_indices()
assert len(indices) == 1
scalar_index = indices[0]
assert scalar_index["type"] == "BTree"
assert scalar_index["type"] == "Scalar"
# Confirm that prefiltering still works with the scalar index column
results = table.search().where("x = 'c'").to_arrow()