mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 11:00:40 +00:00
job wait(): poll by id via get_job (point access) instead of list_jobs
JobHandle/AsyncJobHandle now poll conn.get_job(id, table) -- one job -- instead
of list_jobs() + client-side filter over every active job. The job's table is
threaded in from refresh_column / MV refresh as an O(1) lookup hint. Plumbs
get_job through the Database trait (default not_supported), RemoteDatabase
(GET /v1/job/{id}?table=...), the Connection wrapper, and the pyo3 binding +
db.py.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -784,6 +784,15 @@ class DBConnection(EnforceOverrides):
|
||||
"""List inflight server-side jobs across the database's tables."""
|
||||
return LOOP.run(self._conn.list_jobs())
|
||||
|
||||
def get_job(self, job_id: str, table: "str | None" = None):
|
||||
"""Look up one server-side job by id (the wait()/status poll path).
|
||||
|
||||
Passing ``table`` (the job's table) lets the server answer with an O(1)
|
||||
single-node read instead of scanning the database's active jobs.
|
||||
Returns the job's status, or None if it's unknown or no longer active.
|
||||
"""
|
||||
return LOOP.run(self._conn.get_job(job_id, table))
|
||||
|
||||
def cancel_job(self, job_id: str) -> bool:
|
||||
"""Cancel an inflight server-side job by id (CANCEL JOB).
|
||||
|
||||
@@ -2182,6 +2191,12 @@ class AsyncConnection(object):
|
||||
"""List inflight server-side jobs across the database's tables."""
|
||||
return await self._inner.list_jobs()
|
||||
|
||||
async def get_job(self, job_id: str, table: "str | None" = None):
|
||||
"""Look up one server-side job by id (the wait()/status poll path).
|
||||
``table`` (the job's table) enables an O(1) server-side lookup.
|
||||
Returns the job's status, or None if unknown / no longer active."""
|
||||
return await self._inner.get_job(job_id, table)
|
||||
|
||||
async def cancel_job(self, job_id: str) -> bool:
|
||||
"""Cancel an inflight server-side job by id (CANCEL JOB).
|
||||
|
||||
|
||||
@@ -3842,7 +3842,7 @@ class LanceTable(Table):
|
||||
priority=priority,
|
||||
)
|
||||
)
|
||||
return JobHandle(self._conn, job_id)
|
||||
return JobHandle(self._conn, job_id, table=self.name)
|
||||
|
||||
def alter_columns(
|
||||
self, *alterations: Iterable[Dict[str, str]]
|
||||
|
||||
@@ -510,7 +510,9 @@ class MaterializedView:
|
||||
A no-op when the view was created with no data."""
|
||||
if self.job_id is None:
|
||||
return "finished"
|
||||
return JobHandle(self.conn, self.job_id).wait(timeout=timeout, poll=poll)
|
||||
return JobHandle(self.conn, self.job_id, table=self.name).wait(
|
||||
timeout=timeout, poll=poll
|
||||
)
|
||||
|
||||
def refresh(self, full: bool = False) -> "JobHandle":
|
||||
"""Refresh the materialized view; returns a `JobHandle` to wait on,
|
||||
@@ -521,7 +523,7 @@ class MaterializedView:
|
||||
the view's indexes -- they are reindexed by the distributed indexer.
|
||||
"""
|
||||
job_id = self.conn._refresh_materialized_view(self.name, full=full)
|
||||
return JobHandle(self.conn, job_id)
|
||||
return JobHandle(self.conn, job_id, table=self.name)
|
||||
|
||||
def explain_refresh(self, full: bool = False):
|
||||
"""Plan a refresh without running it (EXPLAIN REFRESH)."""
|
||||
@@ -574,20 +576,20 @@ class JobHandle:
|
||||
#: -> agent cycle -> manifest write is async).
|
||||
GRACE_SECONDS = 20.0
|
||||
|
||||
def __init__(self, conn, job_id: str):
|
||||
def __init__(self, conn, job_id: str, table: "str | None" = None):
|
||||
self.conn = conn
|
||||
self.id = job_id
|
||||
#: The job's table, when known (refresh_column / MV refresh). Lets the
|
||||
#: server resolve this job with an O(1) single-node read; without it the
|
||||
#: lookup scans the database's active jobs (still correct).
|
||||
self.table = table
|
||||
self._created = time.monotonic()
|
||||
self._seen = False
|
||||
|
||||
def _matches(self, listed_id: str) -> bool:
|
||||
return _job_id_matches(self.id, listed_id)
|
||||
|
||||
def _job(self):
|
||||
for j in self.conn.list_jobs():
|
||||
if self._matches(j.job_id):
|
||||
return j
|
||||
return None
|
||||
# Poll by id (one job), not list_jobs (every active job): the server
|
||||
# matches the submission/manifest id and reads just this table's node.
|
||||
return self.conn.get_job(self.id, self.table)
|
||||
|
||||
def status(self) -> str:
|
||||
"""pending / running / cancelling / stale, or 'finished' once the
|
||||
@@ -643,7 +645,7 @@ class AsyncMaterializedView:
|
||||
A no-op when the view was created with no data."""
|
||||
if self.job_id is None:
|
||||
return "finished"
|
||||
return await AsyncJobHandle(self.conn, self.job_id).wait(
|
||||
return await AsyncJobHandle(self.conn, self.job_id, table=self.name).wait(
|
||||
timeout=timeout, poll=poll
|
||||
)
|
||||
|
||||
@@ -655,7 +657,7 @@ class AsyncMaterializedView:
|
||||
(indexes are preserved and reindexed by the distributed indexer).
|
||||
"""
|
||||
job_id = await self.conn._refresh_materialized_view(self.name, full=full)
|
||||
return AsyncJobHandle(self.conn, job_id)
|
||||
return AsyncJobHandle(self.conn, job_id, table=self.name)
|
||||
|
||||
async def explain_refresh(self, full: bool = False):
|
||||
return await self.conn.explain_refresh_materialized_view(self.name, full=full)
|
||||
@@ -678,17 +680,17 @@ class AsyncJobHandle:
|
||||
|
||||
GRACE_SECONDS = 20.0
|
||||
|
||||
def __init__(self, conn, job_id: str):
|
||||
def __init__(self, conn, job_id: str, table: "str | None" = None):
|
||||
self.conn = conn
|
||||
self.id = job_id
|
||||
#: See JobHandle.table -- enables an O(1) by-id lookup when known.
|
||||
self.table = table
|
||||
self._created = time.monotonic()
|
||||
self._seen = False
|
||||
|
||||
async def _job(self):
|
||||
for j in await self.conn.list_jobs():
|
||||
if _job_id_matches(self.id, j.job_id):
|
||||
return j
|
||||
return None
|
||||
# Poll by id, not list_jobs (see JobHandle._job).
|
||||
return await self.conn.get_job(self.id, self.table)
|
||||
|
||||
async def status(self) -> str:
|
||||
job = await self._job()
|
||||
|
||||
@@ -582,6 +582,35 @@ impl Connection {
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (job_id, table=None))]
|
||||
pub fn get_job(
|
||||
self_: PyRef<'_, Self>,
|
||||
job_id: String,
|
||||
table: Option<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let job = inner
|
||||
.get_job(&job_id, table.as_deref())
|
||||
.await
|
||||
.infer_error()?;
|
||||
Ok(job.map(|j| JobInfo {
|
||||
table: j.table,
|
||||
job_id: j.job_id,
|
||||
job_type: j.job_type,
|
||||
state: j.state,
|
||||
column: j.column,
|
||||
age_seconds: j.age_seconds,
|
||||
command: j.command,
|
||||
units_done: j.units_done,
|
||||
units_total: j.units_total,
|
||||
committed: j.committed,
|
||||
rows_skipped: j.rows_skipped,
|
||||
error: j.error,
|
||||
}))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (cur_name, new_name, cur_namespace_path=None, new_namespace_path=None))]
|
||||
pub fn rename_table(
|
||||
self_: PyRef<'_, Self>,
|
||||
|
||||
Reference in New Issue
Block a user