diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py index ab5fd46f3..65a244c5b 100644 --- a/python/python/tests/test_index.py +++ b/python/python/tests/test_index.py @@ -91,7 +91,9 @@ async def test_create_scalar_index(some_table: AsyncTable): # Can recreate if replace=True await some_table.create_index("id", replace=True) indices = await some_table.list_indices() - assert str(indices) == '[Index(BTree, columns=["id"], name="id_idx")]' + assert str(indices).startswith( + '[IndexConfig(name="id_idx", index_type="BTree", columns=["id"]' + ) assert len(indices) == 1 assert indices[0].index_type == "BTree" assert indices[0].columns == ["id"] @@ -106,6 +108,27 @@ async def test_create_scalar_index(some_table: AsyncTable): assert len(indices) == 0 +@pytest.mark.asyncio +async def test_index_config_repr(db_async): + # Use >= 1000 rows so the thousands separator in the repr is exercised. + nrows = 1500 + table = await db_async.create_table( + "repr_table", pa.Table.from_pydict({"id": list(range(nrows))}) + ) + await table.create_index("id", config=BTree()) + indices = await table.list_indices() + assert len(indices) == 1 + + r = repr(indices[0]) + assert r.startswith('IndexConfig(name="id_idx", index_type="BTree", columns=["id"]') + # Integer counts use `_` thousands separators (valid Python int syntax). + assert "num_indexed_rows=1_500" in r + assert "num_unindexed_rows=0" in r + # created_at renders as a datetime so the value round-trips. + assert "created_at=datetime.datetime(" in r + assert r.endswith(")") + + @pytest.mark.asyncio async def test_create_nested_scalar_index_lists_canonical_paths(db_async): metadata_type = pa.struct( @@ -198,7 +221,9 @@ async def test_create_nested_scalar_index_lists_canonical_paths(db_async): async def test_create_fixed_size_binary_index(some_table: AsyncTable): await some_table.create_index("fsb", config=BTree()) indices = await some_table.list_indices() - assert str(indices) == '[Index(BTree, columns=["fsb"], name="fsb_idx")]' + assert str(indices).startswith( + '[IndexConfig(name="fsb_idx", index_type="BTree", columns=["fsb"]' + ) assert len(indices) == 1 assert indices[0].index_type == "BTree" assert indices[0].columns == ["fsb"] @@ -247,7 +272,9 @@ async def test_create_bitmap_index(some_table: AsyncTable): async def test_create_label_list_index(some_table: AsyncTable): await some_table.create_index("tags", config=LabelList()) indices = await some_table.list_indices() - assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]' + assert str(indices).startswith( + '[IndexConfig(name="tags_idx", index_type="LabelList", columns=["tags"]' + ) plan = await some_table.query().where("array_has(tags, 'tag0')").explain_plan() assert "ScalarIndexQuery" in plan @@ -262,7 +289,9 @@ async def test_create_large_list_label_list_index(db_async): await table.create_index("tags", config=LabelList()) indices = await table.list_indices() - assert str(indices) == '[Index(LabelList, columns=["tags"], name="tags_idx")]' + assert str(indices).startswith( + '[IndexConfig(name="tags_idx", index_type="LabelList", columns=["tags"]' + ) plan = await table.query().where("array_has(tags, 'shared')").explain_plan() assert "ScalarIndexQuery" in plan @@ -299,7 +328,9 @@ async def test_create_label_list_index_rejects_list_struct(db_async): async def test_full_text_search_index(some_table: AsyncTable): await some_table.create_index("tags", config=FTS(with_position=False)) indices = await some_table.list_indices() - assert str(indices) == '[Index(FTS, columns=["tags"], name="tags_idx")]' + assert str(indices).startswith( + '[IndexConfig(name="tags_idx", index_type="FTS", columns=["tags"]' + ) await some_table.prewarm_index("tags_idx") diff --git a/python/src/index.rs b/python/src/index.rs index 50407dd21..121d3875d 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -319,11 +319,53 @@ pub struct IndexConfig { #[pymethods] impl IndexConfig { - pub fn __repr__(&self) -> String { - format!( - "Index({}, columns={:?}, name=\"{}\")", - self.index_type, self.columns, self.name - ) + pub fn __repr__(&self, py: Python<'_>) -> String { + let mut fields = vec![ + format!("name={:?}", self.name), + format!("index_type={:?}", self.index_type), + format!("columns={:?}", self.columns), + ]; + if let Some(v) = &self.index_uuid { + fields.push(format!("index_uuid={:?}", v)); + } + if let Some(v) = &self.type_url { + fields.push(format!("type_url={:?}", v)); + } + if let Some(v) = self.created_at { + // Render the datetime's own Python repr so the value round-trips, + // falling back to RFC 3339 if the conversion ever fails. + let rendered = v + .into_pyobject(py) + .ok() + .and_then(|obj| obj.into_any().repr().ok()) + .map(|r| r.to_string()) + .unwrap_or_else(|| v.to_rfc3339()); + fields.push(format!("created_at={}", rendered)); + } + if let Some(v) = self.num_indexed_rows { + fields.push(format!("num_indexed_rows={}", fmt_thousands(v))); + } + if let Some(v) = self.num_unindexed_rows { + fields.push(format!("num_unindexed_rows={}", fmt_thousands(v))); + } + if let Some(v) = self.size_bytes { + fields.push(format!("size_bytes={}", fmt_thousands(v))); + } + if let Some(v) = self.num_segments { + fields.push(format!("num_segments={}", v)); + } + if let Some(v) = self.index_version { + fields.push(format!("index_version={}", v)); + } + if let Some(v) = &self.index_details { + let details = v + .bind(py) + .repr() + .map(|r| r.to_string()) + .unwrap_or_else(|_| "".to_string()); + fields.push(format!("index_details={}", details)); + } + format!("IndexConfig({})", fields.join(", ")) } // For backwards-compatibility with the old sync SDK, we also support getting @@ -352,6 +394,23 @@ impl IndexConfig { } } +/// Format an integer with `_` thousands separators, e.g. `24_500_213`. +/// +/// Underscores are valid Python int-literal syntax, so the repr stays +/// copy-pasteable and machine-parseable while remaining readable. +fn fmt_thousands(n: u64) -> String { + let digits = n.to_string(); + let bytes = digits.as_bytes(); + let mut out = String::with_capacity(digits.len() + digits.len() / 3); + for (i, b) in bytes.iter().enumerate() { + if i > 0 && (bytes.len() - i).is_multiple_of(3) { + out.push('_'); + } + out.push(*b as char); + } + out +} + fn parse_index_details(py: Python<'_>, s: String) -> Py { let json = py.import("json").expect("json module is always available"); match json.call_method1("loads", (s.as_str(),)) { diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 0e016cc4c..f3976d4af 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -1352,6 +1352,35 @@ impl RemoteTable { } } +/// Deserialize an index's `created_at` field. +/// +/// The server returns this as an RFC 3339 string (e.g. `"2026-06-18T21:37:36.637Z"`), +/// but older deployments sent a unix timestamp in milliseconds. Accept both so the +/// client works against any server version. +fn deserialize_created_at<'de, D>( + deserializer: D, +) -> std::result::Result>, D::Error> +where + D: serde::Deserializer<'de>, +{ + use serde::de::Error as _; + + #[derive(Deserialize)] + #[serde(untagged)] + enum CreatedAt { + Rfc3339(String), + Millis(i64), + } + + match Option::::deserialize(deserializer)? { + None => Ok(None), + Some(CreatedAt::Rfc3339(s)) => DateTime::parse_from_rfc3339(&s) + .map(|dt| Some(dt.with_timezone(&Utc))) + .map_err(D::Error::custom), + Some(CreatedAt::Millis(ms)) => Ok(DateTime::from_timestamp_millis(ms)), + } +} + impl RemoteTable { /// Parse the response from `/index/list/` into `IndexConfig` entries. /// @@ -1380,7 +1409,7 @@ impl RemoteTable { // Used as the sentinel to decide whether to skip the stats call. index_type: Option, index_uuid: Option, - #[serde(default, with = "chrono::serde::ts_milliseconds_option")] + #[serde(default, deserialize_with = "deserialize_created_at")] created_at: Option>, num_indexed_rows: Option, num_unindexed_rows: Option, @@ -4678,7 +4707,7 @@ mod tests { "num_segments": 2, "index_version": 1, "index_details": "{\"num_partitions\":16}", - "created_at": 1700000000000i64, + "created_at": "2026-06-18T21:37:36.637Z", "type_url": "type.googleapis.com/lance.index.vector.IvfPq", }, { @@ -4728,7 +4757,10 @@ mod tests { vec_idx.type_url, Some("type.googleapis.com/lance.index.vector.IvfPq".to_string()) ); - assert!(vec_idx.created_at.is_some()); + assert_eq!( + vec_idx.created_at, + Some("2026-06-18T21:37:36.637Z".parse::>().unwrap()) + ); let text_idx = &indices[1]; assert_eq!(text_idx.name, "text_idx"); @@ -4749,6 +4781,36 @@ mod tests { assert_eq!(text_idx.created_at, None); } + #[test] + fn test_deserialize_created_at() { + #[derive(Deserialize)] + struct Wrapper { + #[serde(default, deserialize_with = "deserialize_created_at")] + created_at: Option>, + } + + // RFC 3339 string (current server format). + let w: Wrapper = + serde_json::from_str(r#"{"created_at": "2026-06-18T21:37:36.637Z"}"#).unwrap(); + assert_eq!( + w.created_at, + Some("2026-06-18T21:37:36.637Z".parse::>().unwrap()) + ); + + // Unix milliseconds (legacy server format). + let w: Wrapper = serde_json::from_str(r#"{"created_at": 1700000000000}"#).unwrap(); + assert_eq!(w.created_at, DateTime::from_timestamp_millis(1700000000000)); + + // Null and missing both yield None. + let w: Wrapper = serde_json::from_str(r#"{"created_at": null}"#).unwrap(); + assert_eq!(w.created_at, None); + let w: Wrapper = serde_json::from_str(r#"{}"#).unwrap(); + assert_eq!(w.created_at, None); + + // A malformed string is rejected rather than silently dropped to None. + assert!(serde_json::from_str::(r#"{"created_at": "not-a-date"}"#).is_err()); + } + #[tokio::test] async fn test_list_versions() { let table = Table::new_with_handler("my_table", |request| {