From d6219d687cd386dc9bd168690ac0f971e7f06186 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 5 Dec 2024 13:14:43 -0800 Subject: [PATCH] chore: simplify arrow json conversion (#1910) Taking care of a small TODO --- rust/lancedb/src/remote/table.rs | 15 +++++---------- rust/lancedb/src/utils.rs | 19 +++---------------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 256ea2d9..486912d8 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -9,7 +9,7 @@ use crate::utils::{supported_btree_data_type, supported_vector_data_type}; use crate::{Error, Table}; use arrow_array::RecordBatchReader; use arrow_ipc::reader::FileReader; -use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef}; +use arrow_schema::{DataType, SchemaRef}; use async_trait::async_trait; use datafusion_common::DataFusionError; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; @@ -17,7 +17,7 @@ use datafusion_physical_plan::{ExecutionPlan, SendableRecordBatchStream}; use futures::TryStreamExt; use http::header::CONTENT_TYPE; use http::StatusCode; -use lance::arrow::json::JsonSchema; +use lance::arrow::json::{JsonDataType, JsonSchema}; use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::{ColumnAlteration, NewColumnTransform, Version}; use lance_datafusion::exec::OneShotExec; @@ -687,14 +687,9 @@ impl TableInternal for RemoteTable { value["rename"] = serde_json::Value::String(rename.clone()); } if let Some(data_type) = &alteration.data_type { - // TODO: we can later simplify this substantially, after getting: - // https://github.com/lancedb/lance/pull/3161 - let dummy_schema = - ArrowSchema::new(vec![ArrowField::new("dummy", data_type.clone(), false)]); - let json_schema = JsonSchema::try_from(&dummy_schema).unwrap(); - let json_string = serde_json::to_string(&json_schema).unwrap(); - let json_value: serde_json::Value = serde_json::from_str(&json_string).unwrap(); - value["data_type"] = json_value["fields"][0]["type"].clone(); + let json_data_type = JsonDataType::try_from(data_type).unwrap(); + let json_data_type = serde_json::to_value(&json_data_type).unwrap(); + value["data_type"] = json_data_type; } if let Some(nullable) = &alteration.nullable { value["nullable"] = serde_json::Value::Bool(*nullable); diff --git a/rust/lancedb/src/utils.rs b/rust/lancedb/src/utils.rs index 3a4c3b91..a9f98ee9 100644 --- a/rust/lancedb/src/utils.rs +++ b/rust/lancedb/src/utils.rs @@ -15,7 +15,7 @@ use std::sync::Arc; use arrow_schema::{DataType, Schema}; -use lance::arrow::json::JsonSchema; +use lance::arrow::json::JsonDataType; use lance::dataset::{ReadParams, WriteParams}; use lance::io::{ObjectStoreParams, WrappingObjectStore}; use lazy_static::lazy_static; @@ -178,21 +178,8 @@ pub fn supported_vector_data_type(dtype: &DataType) -> bool { /// Note: this is temporary until we get a proper datatype conversion in Lance. pub fn string_to_datatype(s: &str) -> Option { - // TODO: we can later simplify this substantially, after getting: - // https://github.com/lancedb/lance/pull/3161 - let dummy_schema = format!( - "{{\"fields\": [\ - {{ \"name\": \"n\", \ - \"nullable\": true, \ - \"type\": {{\ - \"type\": \"{}\"\ - }} }}] }}", - s - ); - let json_schema: JsonSchema = serde_json::from_str(&dummy_schema).ok()?; - let schema = Schema::try_from(json_schema).ok()?; - let data_type = schema.field(0).data_type().clone(); - Some(data_type) + let json_type: JsonDataType = serde_json::from_str(s).ok()?; + (&json_type).try_into().ok() } #[cfg(test)]