BREAKING CHANGE: Check if remote table exists when opening (with caching) (#1214)

- make open table behaviour consistent:
- remote tables will check if the table exists by calling /describe and
throwing an error if the call doesn't succeed
- this is similar to the behaviour for local tables where we will raise
an exception when opening the table if the local dataset doesn't exist
- The table names are cached in the client with a TTL
- Also fixes a small bug where if the remote error response was
deserialized from JSON as an object, we'd print it resulting in the
unhelpful error message: `Error: Server Error, status: 404, message: Not
Found: [object Object]`
This commit is contained in:
Bert
2024-04-10 11:54:47 -04:00
committed by GitHub
parent 8a1227030a
commit 25dea4e859
5 changed files with 76 additions and 13 deletions

View File

@@ -18,6 +18,7 @@ from concurrent.futures import ThreadPoolExecutor
from typing import Iterable, List, Optional, Union
from urllib.parse import urlparse
from cachetools import TTLCache
import pyarrow as pa
from overrides import override
@@ -29,7 +30,6 @@ from ..table import Table, _sanitize_data
from ..util import validate_table_name
from .arrow import to_ipc_binary
from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
from .errors import LanceDBClientError
class RemoteDBConnection(DBConnection):
@@ -60,6 +60,7 @@ class RemoteDBConnection(DBConnection):
read_timeout=read_timeout,
)
self._request_thread_pool = request_thread_pool
self._table_cache = TTLCache(maxsize=10000, ttl=300)
def __repr__(self) -> str:
return f"RemoteConnect(name={self.db_name})"
@@ -89,6 +90,7 @@ class RemoteDBConnection(DBConnection):
else:
break
for item in result:
self._table_cache[item] = True
yield item
@override
@@ -109,16 +111,10 @@ class RemoteDBConnection(DBConnection):
self._client.mount_retry_adapter_for_table(name)
# check if table exists
try:
if self._table_cache.get(name) is None:
self._client.post(f"/v1/table/{name}/describe/")
except LanceDBClientError as err:
if str(err).startswith("Not found"):
logging.error(
"Table %s does not exist. Please first call "
"db.create_table(%s, data).",
name,
name,
)
self._table_cache[name] = True
return RemoteTable(self, name)
@override
@@ -267,6 +263,7 @@ class RemoteDBConnection(DBConnection):
content_type=ARROW_STREAM_CONTENT_TYPE,
)
self._table_cache[name] = True
return RemoteTable(self, name)
@override
@@ -282,6 +279,7 @@ class RemoteDBConnection(DBConnection):
self._client.post(
f"/v1/table/{name}/drop/",
)
self._table_cache.pop(name)
async def close(self):
"""Close the connection to the database."""