Compare commits

..

3 Commits

Author SHA1 Message Date
discord9
67a60646b4 chore: rm unwrap
Signed-off-by: discord9 <discord9@163.com>
2025-07-10 10:55:36 +08:00
discord9
1c3bde7e4e docs: explain fast path
Signed-off-by: discord9 <discord9@163.com>
2025-07-10 10:48:38 +08:00
discord9
e045a0dbdf refactor: faster window expr
Signed-off-by: discord9 <discord9@163.com>
2025-07-10 00:00:20 +08:00
106 changed files with 2827 additions and 3158 deletions

57
Cargo.lock generated
View File

@@ -2531,7 +2531,6 @@ dependencies = [
"tokio",
"tokio-postgres",
"tonic 0.12.3",
"tracing",
"typetag",
"uuid",
]
@@ -2996,9 +2995,9 @@ dependencies = [
[[package]]
name = "crc"
version = "3.3.0"
version = "3.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
dependencies = [
"crc-catalog",
]
@@ -3807,7 +3806,6 @@ dependencies = [
"tokio",
"toml 0.8.19",
"tonic 0.12.3",
"tracing",
]
[[package]]
@@ -3830,7 +3828,7 @@ dependencies = [
"jsonb",
"num",
"num-traits",
"ordered-float 4.3.0",
"ordered-float 3.9.2",
"paste",
"serde",
"serde_json",
@@ -4151,16 +4149,12 @@ dependencies = [
[[package]]
name = "domain"
version = "0.11.0"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
checksum = "4c84070523f8ba0f9127ff156920f27eb27b302b425efe60bf5f41ec244d1c60"
dependencies = [
"bumpalo",
"bytes",
"domain-macros",
"futures-util",
"hashbrown 0.14.5",
"log",
"moka",
"octseq",
"rand 0.8.5",
@@ -4171,17 +4165,6 @@ dependencies = [
"tracing",
]
[[package]]
name = "domain-macros"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "dotenv"
version = "0.15.0"
@@ -4811,7 +4794,6 @@ dependencies = [
"toml 0.8.19",
"tonic 0.12.3",
"tower 0.5.2",
"tracing",
"uuid",
]
@@ -7315,7 +7297,6 @@ dependencies = [
"snafu 0.8.5",
"store-api",
"tokio",
"tracing",
]
[[package]]
@@ -7429,7 +7410,6 @@ dependencies = [
"datafusion-expr",
"datatypes",
"dotenv",
"either",
"futures",
"humantime-serde",
"index",
@@ -7465,7 +7445,6 @@ dependencies = [
"tokio-stream",
"tokio-util",
"toml 0.8.19",
"tracing",
"uuid",
]
@@ -8545,7 +8524,6 @@ dependencies = [
"tokio",
"tokio-util",
"tonic 0.12.3",
"tracing",
]
[[package]]
@@ -8582,6 +8560,17 @@ dependencies = [
"num-traits",
]
[[package]]
name = "ordered-float"
version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
dependencies = [
"num-traits",
"rand 0.8.5",
"serde",
]
[[package]]
name = "ordered-float"
version = "4.3.0"
@@ -8589,8 +8578,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
dependencies = [
"num-traits",
"rand 0.8.5",
"serde",
]
[[package]]
@@ -9127,7 +9114,6 @@ dependencies = [
"moka",
"once_cell",
"operator",
"ordered-float 4.3.0",
"paste",
"prometheus",
"query",
@@ -9939,7 +9925,6 @@ dependencies = [
"table",
"tokio",
"tokio-stream",
"tracing",
"unescaper",
"uuid",
]
@@ -11373,10 +11358,8 @@ dependencies = [
"tonic-reflection",
"tower 0.5.2",
"tower-http 0.6.2",
"tracing",
"urlencoding",
"uuid",
"vrl",
"zstd 0.13.2",
]
@@ -13039,9 +13022,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.45.1"
version = "1.44.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
dependencies = [
"backtrace",
"bytes",
@@ -13997,9 +13980,9 @@ dependencies = [
[[package]]
name = "vrl"
version = "0.25.0"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
checksum = "f9ceadaa40aef567a26079ff014ca7a567ba85344f1b81090b5ec7d7bb16a219"
dependencies = [
"aes",
"aes-siv",

View File

@@ -167,7 +167,6 @@ opentelemetry-proto = { version = "0.27", features = [
"with-serde",
"logs",
] }
ordered-float = { version = "4.3", features = ["serde"] }
parking_lot = "0.12"
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
@@ -224,12 +223,10 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.8.8"
tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
tower = "0.5"
tracing = "0.1"
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
typetag = "0.2"
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
vrl = "0.25"
zstd = "0.13"
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

149
scripts/install.sh Normal file → Executable file
View File

@@ -53,54 +53,6 @@ get_arch_type() {
esac
}
# Verify SHA256 checksum
verify_sha256() {
file="$1"
expected_sha256="$2"
if command -v sha256sum >/dev/null 2>&1; then
actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
elif command -v shasum >/dev/null 2>&1; then
actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
else
echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
return 0
fi
if [ "$actual_sha256" = "$expected_sha256" ]; then
echo "SHA256 checksum verified successfully."
return 0
else
echo "Error: SHA256 checksum verification failed!"
echo "Expected: $expected_sha256"
echo "Actual: $actual_sha256"
return 1
fi
}
# Prompt for user confirmation (compatible with different shells)
prompt_confirmation() {
message="$1"
printf "%s (y/N): " "$message"
# Try to read user input, fallback if read fails
answer=""
if read answer </dev/tty 2>/dev/null; then
case "$answer" in
[Yy]|[Yy][Ee][Ss])
return 0
;;
*)
return 1
;;
esac
else
echo ""
echo "Cannot read user input. Defaulting to No."
return 1
fi
}
download_artifact() {
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
# Use the latest stable released version.
@@ -119,104 +71,17 @@ download_artifact() {
fi
echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
PACKAGE_NAME="${PKG_NAME}.tar.gz"
SHA256_FILE="${PKG_NAME}.sha256sum"
PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
if [ -n "${PACKAGE_NAME}" ]; then
# Check if files already exist and prompt for override
if [ -f "${PACKAGE_NAME}" ]; then
echo "File ${PACKAGE_NAME} already exists."
if prompt_confirmation "Do you want to override it?"; then
echo "Overriding existing file..."
rm -f "${PACKAGE_NAME}"
else
echo "Skipping download. Using existing file."
fi
fi
if [ -f "${BIN}" ]; then
echo "Binary ${BIN} already exists."
if prompt_confirmation "Do you want to override it?"; then
echo "Will override existing binary..."
rm -f "${BIN}"
else
echo "Installation cancelled."
exit 0
fi
fi
# Download package if not exists
if [ ! -f "${PACKAGE_NAME}" ]; then
echo "Downloading ${PACKAGE_NAME}..."
# Use curl instead of wget for better compatibility
if command -v curl >/dev/null 2>&1; then
if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
echo "Error: Failed to download ${PACKAGE_NAME}"
exit 1
fi
elif command -v wget >/dev/null 2>&1; then
if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
echo "Error: Failed to download ${PACKAGE_NAME}"
exit 1
fi
else
echo "Error: Neither curl nor wget is available for downloading."
exit 1
fi
fi
# Download and verify SHA256 checksum
echo "Downloading SHA256 checksum..."
sha256_download_success=0
if command -v curl >/dev/null 2>&1; then
if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
sha256_download_success=1
fi
elif command -v wget >/dev/null 2>&1; then
if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
sha256_download_success=1
fi
fi
if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
if [ -n "$expected_sha256" ]; then
if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
echo "SHA256 verification failed. Removing downloaded file."
rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
exit 1
fi
else
echo "Warning: Could not parse SHA256 checksum from file."
fi
rm -f "${SHA256_FILE}"
else
echo "Warning: Could not download SHA256 checksum file. Skipping verification."
fi
wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
# Extract the binary and clean the rest.
echo "Extracting ${PACKAGE_NAME}..."
if ! tar xf "${PACKAGE_NAME}"; then
echo "Error: Failed to extract ${PACKAGE_NAME}"
exit 1
fi
# Find the binary in the extracted directory
extracted_dir="${PACKAGE_NAME%.tar.gz}"
if [ -f "${extracted_dir}/${BIN}" ]; then
mv "${extracted_dir}/${BIN}" "${PWD}/"
rm -f "${PACKAGE_NAME}"
rm -rf "${extracted_dir}"
chmod +x "${BIN}"
echo "Installation completed successfully!"
echo "Run './${BIN} --help' to get started"
else
echo "Error: Binary ${BIN} not found in extracted archive"
rm -f "${PACKAGE_NAME}"
rm -rf "${extracted_dir}"
exit 1
fi
tar xvf "${PACKAGE_NAME}" && \
mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
rm -r "${PACKAGE_NAME}" && \
rm -r "${PACKAGE_NAME%.tar.gz}" && \
echo "Run './${BIN} --help' to get started"
fi
fi
}

View File

@@ -169,7 +169,7 @@ impl DfPartitionStream for PGClass {
}
/// Builds the `pg_catalog.pg_class` table row by row
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] because we don't have users.
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] cuz we don't have user.
/// Once we have user system, make it the actual owner of the table.
struct PGClassBuilder {
schema: SchemaRef,

View File

@@ -23,7 +23,7 @@ use api::v1::greptime_request::Request;
use api::v1::query_request::Query;
use api::v1::{
AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
InsertRequests, QueryRequest, RequestHeader, RowInsertRequests,
InsertRequests, QueryRequest, RequestHeader,
};
use arrow_flight::{FlightData, Ticket};
use async_stream::stream;
@@ -118,7 +118,6 @@ impl Database {
}
}
/// Set the catalog for the database client.
pub fn set_catalog(&mut self, catalog: impl Into<String>) {
self.catalog = catalog.into();
}
@@ -131,7 +130,6 @@ impl Database {
}
}
/// Set the schema for the database client.
pub fn set_schema(&mut self, schema: impl Into<String>) {
self.schema = schema.into();
}
@@ -144,24 +142,20 @@ impl Database {
}
}
/// Set the timezone for the database client.
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
self.timezone = timezone.into();
}
/// Set the auth scheme for the database client.
pub fn set_auth(&mut self, auth: AuthScheme) {
self.ctx.auth_header = Some(AuthHeader {
auth_scheme: Some(auth),
});
}
/// Make an InsertRequests request to the database.
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
self.handle(Request::Inserts(requests)).await
}
/// Make an InsertRequests request to the database with hints.
pub async fn insert_with_hints(
&self,
requests: InsertRequests,
@@ -178,28 +172,6 @@ impl Database {
from_grpc_response(response)
}
/// Make a RowInsertRequests request to the database.
pub async fn row_inserts(&self, requests: RowInsertRequests) -> Result<u32> {
self.handle(Request::RowInserts(requests)).await
}
/// Make a RowInsertRequests request to the database with hints.
pub async fn row_inserts_with_hints(
&self,
requests: RowInsertRequests,
hints: &[(&str, &str)],
) -> Result<u32> {
let mut client = make_database_client(&self.client)?.inner;
let request = self.to_rpc_request(Request::RowInserts(requests));
let mut request = tonic::Request::new(request);
let metadata = request.metadata_mut();
Self::put_hints(metadata, hints)?;
let response = client.handle(request).await?.into_inner();
from_grpc_response(response)
}
fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
let Some(value) = hints
.iter()
@@ -215,7 +187,6 @@ impl Database {
Ok(())
}
/// Make a request to the database.
pub async fn handle(&self, request: Request) -> Result<u32> {
let mut client = make_database_client(&self.client)?.inner;
let request = self.to_rpc_request(request);
@@ -279,7 +250,6 @@ impl Database {
}
}
/// Executes a SQL query without any hints.
pub async fn sql<S>(&self, sql: S) -> Result<Output>
where
S: AsRef<str>,
@@ -287,7 +257,6 @@ impl Database {
self.sql_with_hint(sql, &[]).await
}
/// Executes a SQL query with optional hints for query optimization.
pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
where
S: AsRef<str>,
@@ -298,7 +267,6 @@ impl Database {
self.do_get(request, hints).await
}
/// Executes a logical plan directly without SQL parsing.
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
let request = Request::Query(QueryRequest {
query: Some(Query::LogicalPlan(logical_plan)),
@@ -306,7 +274,6 @@ impl Database {
self.do_get(request, &[]).await
}
/// Creates a new table using the provided table expression.
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
let request = Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)),
@@ -314,7 +281,6 @@ impl Database {
self.do_get(request, &[]).await
}
/// Alters an existing table using the provided alter expression.
pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
let request = Request::Ddl(DdlRequest {
expr: Some(DdlExpr::AlterTable(expr)),

View File

@@ -69,7 +69,6 @@ table = { workspace = true, features = ["testing"] }
tokio.workspace = true
tokio-postgres = { workspace = true, optional = true }
tonic.workspace = true
tracing.workspace = true
typetag.workspace = true
[dev-dependencies]

View File

@@ -20,8 +20,8 @@ use api::v1::region::{alter_request, AlterRequest, RegionRequest, RegionRequestH
use api::v1::AlterTableExpr;
use common_catalog::format_full_table_name;
use common_grpc_expr::alter_expr_to_request;
use common_telemetry::debug;
use common_telemetry::tracing_context::TracingContext;
use common_telemetry::{debug, info};
use futures::future;
use snafu::{ensure, ResultExt};
use store_api::metadata::ColumnMetadata;
@@ -304,10 +304,5 @@ fn build_new_table_info(
| AlterKind::DropDefaults { .. } => {}
}
info!(
"Built new table info: {:?} for table {}, table_id: {}",
new_info.meta, table_name, table_id
);
Ok(new_info)
}

View File

@@ -21,7 +21,7 @@ use crate::key::table_name::TableNameKey;
impl CreateFlowProcedure {
/// Allocates the [FlowId].
pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
// TODO(weny, ruihang): We don't support the partitions. It's always be 1, now.
//TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
let partitions = 1;
let (flow_id, peers) = self
.context

View File

@@ -113,19 +113,15 @@ impl TableMetadataAllocator {
table_id: TableId,
task: &CreateTableTask,
) -> Result<PhysicalTableRouteValue> {
let num_regions = task
.partitions
.as_ref()
.map(|p| p.value_list.len())
.unwrap_or(1);
let regions = task.partitions.len();
ensure!(
num_regions > 0,
regions > 0,
error::UnexpectedSnafu {
err_msg: "The number of partitions must be greater than 0"
}
);
let peers = self.peer_allocator.alloc(num_regions).await?;
let peers = self.peer_allocator.alloc(regions).await?;
debug!("Allocated peers {:?} for table {}", peers, table_id);
let region_routes = task
.partitions

View File

@@ -21,6 +21,7 @@ pub mod flownode_handler;
use std::assert_matches::assert_matches;
use std::collections::HashMap;
use api::v1::meta::Partition;
use api::v1::{ColumnDataType, SemanticType};
use common_procedure::Status;
use datatypes::prelude::ConcreteDataType;
@@ -144,7 +145,10 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
CreateTableTask {
create_table,
// Single region
partitions: None,
partitions: vec![Partition {
column_list: vec![],
value_list: vec![],
}],
table_info,
}
}
@@ -179,7 +183,10 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
CreateTableTask {
create_table,
// Single region
partitions: None,
partitions: vec![Partition {
column_list: vec![],
value_list: vec![],
}],
table_info,
}
}

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use api::v1::column_def::try_as_column_schema;
use api::v1::meta::Partition;
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
use chrono::DateTime;
use common_catalog::consts::{
@@ -174,7 +175,10 @@ pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask
CreateTableTask {
create_table,
// Single region
partitions: None,
partitions: vec![Partition {
column_list: vec![],
value_list: vec![],
}],
table_info,
}
}

View File

@@ -17,7 +17,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use api::region::RegionResponse;
use api::v1::meta::Peer;
use api::v1::meta::{Partition, Peer};
use api::v1::region::{region_request, RegionRequest};
use api::v1::{ColumnDataType, SemanticType};
use common_error::ext::ErrorExt;
@@ -141,7 +141,10 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
CreateTableTask {
create_table,
// Single region
partitions: None,
partitions: vec![Partition {
column_list: vec![],
value_list: vec![],
}],
table_info,
}
}
@@ -215,7 +218,7 @@ async fn test_on_prepare_with_no_partition_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let mut task = test_create_table_task("foo");
task.partitions = None;
task.partitions = vec![];
task.create_table.create_if_not_exists = true;
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();

View File

@@ -19,17 +19,11 @@ pub use api::v1::meta::Peer;
use crate::error::Error;
use crate::{DatanodeId, FlownodeId};
/// PeerLookupService is a service that can lookup peers.
/// can query peer given a node id
#[async_trait::async_trait]
pub trait PeerLookupService {
/// Returns the datanode with the given id. It may return inactive peers.
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
/// Returns the flownode with the given id. It may return inactive peers.
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
/// Returns all currently active frontend nodes that have reported a heartbeat within the most recent heartbeat interval from the in-memory backend.
async fn active_frontends(&self) -> Result<Vec<Peer>, Error>;
}
pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;

View File

@@ -96,7 +96,7 @@ impl DdlTask {
/// Creates a [`DdlTask`] to create a table.
pub fn new_create_table(
expr: CreateTableExpr,
partitions: Option<Partition>,
partitions: Vec<Partition>,
table_info: RawTableInfo,
) -> Self {
DdlTask::CreateTable(CreateTableTask::new(expr, partitions, table_info))
@@ -107,7 +107,7 @@ impl DdlTask {
DdlTask::CreateLogicalTables(
table_data
.into_iter()
.map(|(expr, table_info)| CreateTableTask::new(expr, None, table_info))
.map(|(expr, table_info)| CreateTableTask::new(expr, Vec::new(), table_info))
.collect(),
)
}
@@ -606,10 +606,7 @@ impl From<DropTableTask> for PbDropTableTask {
#[derive(Debug, PartialEq, Clone)]
pub struct CreateTableTask {
pub create_table: CreateTableExpr,
/// The partitions of the table.
///
/// If the table is created with a single region (not partitioned), this field is `None`.
pub partitions: Option<Partition>,
pub partitions: Vec<Partition>,
pub table_info: RawTableInfo,
}
@@ -623,7 +620,7 @@ impl TryFrom<PbCreateTableTask> for CreateTableTask {
pb.create_table.context(error::InvalidProtoMsgSnafu {
err_msg: "expected create table",
})?,
pb.partitions.first().cloned(),
pb.partitions,
table_info,
))
}
@@ -636,10 +633,7 @@ impl TryFrom<CreateTableTask> for PbCreateTableTask {
Ok(PbCreateTableTask {
table_info: serde_json::to_vec(&task.table_info).context(error::SerdeJsonSnafu)?,
create_table: Some(task.create_table),
partitions: match task.partitions {
Some(p) => vec![p],
None => vec![],
},
partitions: task.partitions,
})
}
}
@@ -647,7 +641,7 @@ impl TryFrom<CreateTableTask> for PbCreateTableTask {
impl CreateTableTask {
pub fn new(
expr: CreateTableExpr,
partitions: Option<Partition>,
partitions: Vec<Partition>,
table_info: RawTableInfo,
) -> CreateTableTask {
CreateTableTask {
@@ -707,10 +701,7 @@ impl Serialize for CreateTableTask {
let pb = PbCreateTableTask {
create_table: Some(self.create_table.clone()),
partitions: match &self.partitions {
Some(p) => vec![p.clone()],
None => vec![],
},
partitions: self.partitions.clone(),
table_info,
};
let buf = pb.encode_to_vec();
@@ -1324,7 +1315,7 @@ mod tests {
let table_info = test_table_info(1025, "foo", "bar", "baz", Arc::new(schema));
let task = CreateTableTask::new(
CreateTableExpr::default(),
None,
Vec::new(),
RawTableInfo::from(table_info),
);
@@ -1420,7 +1411,8 @@ mod tests {
..Default::default()
};
let mut create_table_task = CreateTableTask::new(create_table_expr, None, raw_table_info);
let mut create_table_task =
CreateTableTask::new(create_table_expr, Vec::new(), raw_table_info);
// Call the sort_columns method
create_table_task.sort_columns();

View File

@@ -391,9 +391,6 @@ impl From<Region> for PbRegion {
}
}
/// Serialized version of `PartitionDef`.
///
/// Represent the entire partition part of one table
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
pub struct Partition {
#[serde(serialize_with = "as_utf8_vec", deserialize_with = "from_utf8_vec")]

View File

@@ -213,10 +213,6 @@ impl PeerLookupService for NoopPeerLookupService {
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
Ok(Some(Peer::empty(id)))
}
async fn active_frontends(&self) -> Result<Vec<Peer>> {
Ok(vec![])
}
}
/// Create a kafka topic pool for testing.

View File

@@ -56,18 +56,8 @@ macro_rules! parse_number_to_value {
},
)+
ConcreteDataType::Timestamp(t) => {
let n = parse_sql_number::<i64>($n)?;
let timestamp = Timestamp::new(n, t.unit());
// Check if the value is within the valid range for the target unit
if Timestamp::is_overflow(n, t.unit()) {
return TimestampOverflowSnafu {
timestamp,
target_unit: t.unit(),
}.fail();
}
Ok(Value::Timestamp(timestamp))
let n = parse_sql_number::<i64>($n)?;
Ok(Value::Timestamp(Timestamp::new(n, t.unit())))
},
// TODO(QuenKar): This could need to be optimized
// if this from_str function is slow,
@@ -372,7 +362,6 @@ pub(crate) fn parse_hex_string(s: &str) -> Result<Value> {
mod test {
use common_base::bytes::Bytes;
use common_time::timestamp::TimeUnit;
use datatypes::types::TimestampType;
use datatypes::value::OrderedFloat;
use super::*;
@@ -1092,89 +1081,4 @@ mod test {
);
assert!(v.is_ok());
}
#[test]
fn test_sql_number_to_value_timestamp_strict_typing() {
// Test that values are interpreted according to the target column type
let timestamp_type = TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
let data_type = ConcreteDataType::Timestamp(timestamp_type);
// Valid millisecond timestamp
let millisecond_str = "1747814093865";
let result = sql_number_to_value(&data_type, millisecond_str).unwrap();
if let Value::Timestamp(ts) = result {
assert_eq!(ts.unit(), TimeUnit::Millisecond);
assert_eq!(ts.value(), 1747814093865);
} else {
panic!("Expected timestamp value");
}
// Large value that would overflow when treated as milliseconds should be rejected
let nanosecond_str = "1747814093865000000"; // This is too large for millisecond precision
let result = sql_number_to_value(&data_type, nanosecond_str);
assert!(
result.is_err(),
"Should reject overly large timestamp values"
);
}
#[test]
fn test_sql_number_to_value_timestamp_different_units() {
// Test second precision
let second_type = TimestampType::Second(datatypes::types::TimestampSecondType);
let second_data_type = ConcreteDataType::Timestamp(second_type);
let second_str = "1747814093";
let result = sql_number_to_value(&second_data_type, second_str).unwrap();
if let Value::Timestamp(ts) = result {
assert_eq!(ts.unit(), TimeUnit::Second);
assert_eq!(ts.value(), 1747814093);
} else {
panic!("Expected timestamp value");
}
// Test nanosecond precision
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
let nanosecond_str = "1747814093865000000";
let result = sql_number_to_value(&nanosecond_data_type, nanosecond_str).unwrap();
if let Value::Timestamp(ts) = result {
assert_eq!(ts.unit(), TimeUnit::Nanosecond);
assert_eq!(ts.value(), 1747814093865000000);
} else {
panic!("Expected timestamp value");
}
}
#[test]
fn test_timestamp_range_validation() {
// Test that our range checking works correctly
let nanosecond_value = 1747814093865000000i64; // This should be too large for millisecond
// This should work for nanosecond precision
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
let result = sql_number_to_value(&nanosecond_data_type, "1747814093865000000");
assert!(
result.is_ok(),
"Nanosecond value should be valid for nanosecond column"
);
// This should fail for millisecond precision (value too large)
let millisecond_type =
TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
let millisecond_data_type = ConcreteDataType::Timestamp(millisecond_type);
let result = sql_number_to_value(&millisecond_data_type, "1747814093865000000");
assert!(
result.is_err(),
"Nanosecond value should be rejected for millisecond column"
);
// Verify the ranges work as expected
assert!(
nanosecond_value > Timestamp::MAX_MILLISECOND.value(),
"Test value should exceed millisecond range"
);
}
}

View File

@@ -498,17 +498,6 @@ impl Timestamp {
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
/// Checks if a value would overflow for the given time unit.
pub fn is_overflow(value: i64, unit: TimeUnit) -> bool {
let (min_val, max_val) = match unit {
TimeUnit::Second => (Self::MIN_SECOND.value(), Self::MAX_SECOND.value()),
TimeUnit::Millisecond => (Self::MIN_MILLISECOND.value(), Self::MAX_MILLISECOND.value()),
TimeUnit::Microsecond => (Self::MIN_MICROSECOND.value(), Self::MAX_MICROSECOND.value()),
TimeUnit::Nanosecond => (Self::MIN_NANOSECOND.value(), Self::MAX_NANOSECOND.value()),
};
value < min_val || value > max_val
}
}
/// Converts the naive datetime (which has no specific timezone) to a

View File

@@ -66,7 +66,6 @@ table.workspace = true
tokio.workspace = true
toml.workspace = true
tonic.workspace = true
tracing.workspace = true
[dev-dependencies]
cache.workspace = true

View File

@@ -424,15 +424,7 @@ impl CountdownTask {
},
Some(CountdownCommand::Reset((role, deadline, extension_info))) => {
if let Err(err) = self.region_server.set_region_role(self.region_id, role) {
if err.status_code() == StatusCode::RegionNotFound {
// Table metadata in metasrv is deleted after its regions are dropped.
// The datanode may still receive lease renewal responses that depend on the metadata
// during the short period before it is removed.
warn!(err; "Failed to set region role to {role} for region {region_id}");
}else{
error!(err; "Failed to set region role to {role} for region {region_id}");
}
error!(err; "Failed to set region role to {role} for region {region_id}");
}
if let Some(ext_handler) = self.handler_ext.as_ref() {
ext_handler.reset_deadline(

View File

@@ -27,14 +27,14 @@ lazy_static! {
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
"greptime_datanode_handle_region_request_elapsed",
"datanode handle region request elapsed",
&[REGION_REQUEST_TYPE]
&[REGION_ID, REGION_REQUEST_TYPE]
)
.unwrap();
/// The number of rows in region request received by region server, labeled with request type.
pub static ref REGION_CHANGED_ROW_COUNT: IntCounterVec = register_int_counter_vec!(
"greptime_datanode_region_changed_row_count",
"datanode region changed row count",
&[REGION_REQUEST_TYPE]
&[REGION_ID, REGION_REQUEST_TYPE]
)
.unwrap();
/// The elapsed time since the last received heartbeat.

View File

@@ -968,8 +968,9 @@ impl RegionServerInner {
request: RegionRequest,
) -> Result<RegionResponse> {
let request_type = request.request_type();
let region_id_str = region_id.to_string();
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
.with_label_values(&[request_type])
.with_label_values(&[&region_id_str, request_type])
.start_timer();
let region_change = match &request {
@@ -1009,7 +1010,7 @@ impl RegionServerInner {
// Update metrics
if matches!(region_change, RegionChange::Ingest) {
crate::metrics::REGION_CHANGED_ROW_COUNT
.with_label_values(&[request_type])
.with_label_values(&[&region_id_str, request_type])
.inc_by(result.affected_rows as u64);
}
// Sets corresponding region status to ready.

View File

@@ -28,7 +28,7 @@ greptime-proto.workspace = true
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
ordered-float.workspace = true
ordered-float = { version = "3.0", features = ["serde"] }
paste.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -497,7 +497,7 @@ impl StreamingEngine {
&self,
schema: &RelationDesc,
) -> Result<(Vec<String>, Vec<ColumnSchema>, bool), Error> {
// TODO(discord9): consider remove buggy auto create by schema
// TODO(discord9): condiser remove buggy auto create by schema
// TODO(discord9): use default key from schema
let primary_keys = schema

View File

@@ -74,7 +74,6 @@ tokio.workspace = true
tokio-util.workspace = true
toml.workspace = true
tonic.workspace = true
tracing.workspace = true
[dev-dependencies]
catalog = { workspace = true, features = ["testing"] }

View File

@@ -20,11 +20,8 @@ use std::task::{Context, Poll};
use api::v1::meta::heartbeat_request::NodeWorkloads;
use common_error::ext::BoxedError;
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role as ClusterRole};
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
use common_meta::kv_backend::{KvBackend, ResettableKvBackendRef};
use common_meta::peer::{Peer, PeerLookupService};
use common_meta::rpc::store::RangeRequest;
use common_meta::{util, DatanodeId, FlownodeId};
use common_time::util as time_util;
use common_workload::DatanodeWorkloadType;
@@ -34,19 +31,10 @@ use crate::cluster::MetaPeerClientRef;
use crate::error::{Error, KvBackendSnafu, Result};
use crate::key::{DatanodeLeaseKey, FlownodeLeaseKey, LeaseValue};
enum Value<'a> {
LeaseValue(&'a LeaseValue),
NodeInfo(&'a NodeInfo),
}
fn build_lease_filter(lease_secs: u64) -> impl Fn(Value) -> bool {
move |value: Value| {
let active_time = match value {
Value::LeaseValue(lease_value) => lease_value.timestamp_millis,
Value::NodeInfo(node_info) => node_info.last_activity_ts,
};
((time_util::current_time_millis() - active_time) as u64) < lease_secs.saturating_mul(1000)
fn build_lease_filter(lease_secs: u64) -> impl Fn(&LeaseValue) -> bool {
move |v: &LeaseValue| {
((time_util::current_time_millis() - v.timestamp_millis) as u64)
< lease_secs.saturating_mul(1000)
}
}
@@ -103,7 +91,7 @@ pub async fn lookup_datanode_peer(
return Ok(None);
};
let lease_value: LeaseValue = kv.value.try_into()?;
let is_alive = lease_filter(Value::LeaseValue(&lease_value));
let is_alive = lease_filter(&lease_value);
if is_alive {
Ok(Some(Peer {
id: lease_key.node_id,
@@ -167,7 +155,7 @@ where
let condition = this.condition;
let key_prefix = std::mem::take(&mut this.key_prefix);
let fut = filter(key_prefix, this.meta_peer_client, move |v| {
lease_filter(Value::LeaseValue(v)) && condition.unwrap_or(|_| true)(v)
lease_filter(v) && condition.unwrap_or(|_| true)(v)
});
this.inner_future = Some(Box::pin(fut));
@@ -204,7 +192,7 @@ pub async fn lookup_flownode_peer(
};
let lease_value: LeaseValue = kv.value.try_into()?;
let is_alive = lease_filter(Value::LeaseValue(&lease_value));
let is_alive = lease_filter(&lease_value);
if is_alive {
Ok(Some(Peer {
id: lease_key.node_id,
@@ -215,29 +203,6 @@ pub async fn lookup_flownode_peer(
}
}
/// Lookup all alive frontends from the memory backend, only return if it's alive under given `lease_secs`.
pub async fn lookup_frontends(
meta_peer_client: &MetaPeerClientRef,
lease_secs: u64,
) -> Result<Vec<Peer>> {
let range_request =
RangeRequest::new().with_prefix(NodeInfoKey::key_prefix_with_role(ClusterRole::Frontend));
let response = meta_peer_client.range(range_request).await?;
let lease_filter = build_lease_filter(lease_secs);
let mut peers = Vec::with_capacity(response.kvs.len());
for kv in response.kvs {
let node_info = NodeInfo::try_from(kv.value).context(KvBackendSnafu)?;
let is_alive = lease_filter(Value::NodeInfo(&node_info));
if is_alive {
peers.push(node_info.peer);
}
}
Ok(peers)
}
/// Find all alive flownodes
pub fn alive_flownodes(
meta_peer_client: &MetaPeerClientRef,
@@ -299,42 +264,25 @@ impl PeerLookupService for MetaPeerLookupService {
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)
}
async fn flownode(&self, id: FlownodeId) -> common_meta::error::Result<Option<Peer>> {
lookup_flownode_peer(id, &self.meta_peer_client, u64::MAX)
.await
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)
}
async fn active_frontends(&self) -> common_meta::error::Result<Vec<Peer>> {
// Get the active frontends within the last heartbeat interval.
lookup_frontends(
&self.meta_peer_client,
// TODO(zyy17): How to get the heartbeat interval of the frontend if it uses a custom heartbeat interval?
FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
)
.await
.map_err(BoxedError::new)
.context(common_meta::error::ExternalSnafu)
}
}
#[cfg(test)]
mod tests {
use api::v1::meta::heartbeat_request::NodeWorkloads;
use api::v1::meta::DatanodeWorkloads;
use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus};
use common_meta::kv_backend::ResettableKvBackendRef;
use common_meta::peer::Peer;
use common_meta::rpc::store::PutRequest;
use common_time::util::current_time_millis;
use common_workload::DatanodeWorkloadType;
use crate::key::{DatanodeLeaseKey, LeaseValue};
use crate::lease::{
alive_datanodes, is_datanode_accept_ingest_workload, lookup_frontends, ClusterRole,
};
use crate::lease::{alive_datanodes, is_datanode_accept_ingest_workload};
use crate::test_util::create_meta_peer_client;
async fn put_lease_value(
@@ -443,60 +391,4 @@ mod tests {
assert_eq!(leases.len(), 1);
assert!(leases.contains_key(&DatanodeLeaseKey { node_id: 2 }));
}
#[tokio::test]
async fn test_lookup_frontends() {
let client = create_meta_peer_client();
let in_memory = client.memory_backend();
let lease_secs = 10;
let active_frontend_node = NodeInfo {
peer: Peer {
id: 0,
addr: "127.0.0.1:20201".to_string(),
},
last_activity_ts: current_time_millis(),
status: NodeStatus::Frontend(FrontendStatus {}),
version: "1.0.0".to_string(),
git_commit: "1234567890".to_string(),
start_time_ms: current_time_millis() as u64,
};
let key_prefix = NodeInfoKey::key_prefix_with_role(ClusterRole::Frontend);
in_memory
.put(PutRequest {
key: format!("{}{}", key_prefix, "0").into(),
value: active_frontend_node.try_into().unwrap(),
prev_kv: false,
})
.await
.unwrap();
let inactive_frontend_node = NodeInfo {
peer: Peer {
id: 1,
addr: "127.0.0.1:20201".to_string(),
},
last_activity_ts: current_time_millis() - 20 * 1000,
status: NodeStatus::Frontend(FrontendStatus {}),
version: "1.0.0".to_string(),
git_commit: "1234567890".to_string(),
start_time_ms: current_time_millis() as u64,
};
in_memory
.put(PutRequest {
key: format!("{}{}", key_prefix, "1").into(),
value: inactive_frontend_node.try_into().unwrap(),
prev_kv: false,
})
.await
.unwrap();
let peers = lookup_frontends(&client, lease_secs as u64).await.unwrap();
assert_eq!(peers.len(), 1);
assert_eq!(peers[0].id, 0);
}
}

View File

@@ -15,6 +15,7 @@
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, Mutex};
use api::v1::meta::Partition;
use api::v1::region::region_request::Body as PbRegionRequest;
use api::v1::region::{CreateRequest as PbCreateRegionRequest, RegionColumnDef};
use api::v1::{ColumnDataType, ColumnDef as PbColumnDef, SemanticType};
@@ -83,7 +84,14 @@ fn create_table_task(table_name: Option<&str>) -> CreateTableTask {
.into();
let table_info = build_raw_table_info_from_expr(&expr);
CreateTableTask::new(expr, None, table_info)
CreateTableTask::new(
expr,
vec![Partition {
column_list: vec![],
value_list: vec![],
}],
table_info,
)
}
#[test]

View File

@@ -38,7 +38,6 @@ smallvec.workspace = true
snafu.workspace = true
store-api.workspace = true
tokio.workspace = true
tracing.workspace = true
[dev-dependencies]
common-meta = { workspace = true, features = ["testing"] }

View File

@@ -42,7 +42,6 @@ datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes.workspace = true
dotenv.workspace = true
either.workspace = true
futures.workspace = true
humantime-serde.workspace = true
index.workspace = true
@@ -76,7 +75,6 @@ table.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
tokio-util.workspace = true
tracing.workspace = true
uuid.workspace = true
[dev-dependencies]

View File

@@ -368,7 +368,6 @@ impl CompactionScheduler {
picker_output: picker_output.clone(),
start_time,
waiters,
ttl,
};
let result = remote_job_scheduler

View File

@@ -20,7 +20,6 @@ use api::v1::region::compact_request;
use common_meta::key::SchemaMetadataManagerRef;
use common_telemetry::{info, warn};
use common_time::TimeToLive;
use either::Either;
use itertools::Itertools;
use object_store::manager::ObjectStoreManagerRef;
use serde::{Deserialize, Serialize};
@@ -117,7 +116,7 @@ pub async fn open_compaction_region(
req: &OpenCompactionRegionRequest,
mito_config: &MitoConfig,
object_store_manager: ObjectStoreManagerRef,
ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
schema_metadata_manager: SchemaMetadataManagerRef,
) -> Result<CompactionRegion> {
let object_store = {
let name = &req.region_options.storage;
@@ -198,22 +197,16 @@ pub async fn open_compaction_region(
}
};
let ttl = match ttl_provider {
// Use the specified ttl.
Either::Left(ttl) => ttl,
// Get the ttl from the schema metadata manager.
Either::Right(schema_metadata_manager) => find_ttl(
req.region_id.table_id(),
current_version.options.ttl,
&schema_metadata_manager,
)
.await
.unwrap_or_else(|e| {
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
TimeToLive::default()
}),
};
let ttl = find_ttl(
req.region_id.table_id(),
current_version.options.ttl,
&schema_metadata_manager,
)
.await
.unwrap_or_else(|e| {
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
TimeToLive::default()
});
Ok(CompactionRegion {
region_id: req.region_id,
region_options: req.region_options.clone(),

View File

@@ -19,7 +19,6 @@ use api::v1::Rows;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_recordbatch::RecordBatches;
use either::Either;
use store_api::region_engine::{RegionEngine, RegionRole};
use store_api::region_request::{
RegionCloseRequest, RegionOpenRequest, RegionPutRequest, RegionRequest,
@@ -475,7 +474,7 @@ async fn test_open_compaction_region() {
&req,
&mito_config,
object_store_manager.clone(),
Either::Right(schema_metadata_manager),
schema_metadata_manager,
)
.await
.unwrap();

View File

@@ -694,7 +694,7 @@ mod tests {
let read_format = ReadFormat::new_with_all_columns(metadata.clone());
let mut batches = VecDeque::new();
read_format
.convert_record_batch(&batch, None, &mut batches)
.convert_record_batch(&batch, &mut batches)
.unwrap();
if !dedup {
assert_eq!(

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use std::time::Instant;
use common_telemetry::error;
use common_time::TimeToLive;
use serde::{Deserialize, Serialize};
use snafu::{Location, ResultExt, Snafu};
use store_api::storage::RegionId;
@@ -109,7 +108,6 @@ pub struct CompactionJob {
pub compaction_region: CompactionRegion,
pub picker_output: PickerOutput,
pub start_time: Instant,
pub ttl: TimeToLive,
/// Send the result of the compaction job to these waiters.
pub waiters: Vec<OutputTx>,
}

View File

@@ -95,7 +95,7 @@ mod tests {
use datafusion_common::{Column, ScalarValue};
use datafusion_expr::{col, lit, BinaryExpr, Expr, Operator};
use datatypes::arrow;
use datatypes::arrow::array::{RecordBatch, UInt64Array};
use datatypes::arrow::array::RecordBatch;
use datatypes::arrow::datatypes::{DataType, Field, Schema};
use parquet::arrow::AsyncArrowWriter;
use parquet::basic::{Compression, Encoding, ZstdLevel};
@@ -107,7 +107,7 @@ mod tests {
use super::*;
use crate::access_layer::{FilePathProvider, OperationType, RegionFilePathFactory};
use crate::cache::{CacheManager, CacheStrategy, PageKey};
use crate::read::{BatchBuilder, BatchReader};
use crate::read::BatchReader;
use crate::region::options::{IndexOptions, InvertedIndexOptions};
use crate::sst::file::{FileHandle, FileMeta};
use crate::sst::file_purger::NoopFilePurger;
@@ -120,8 +120,8 @@ mod tests {
use crate::sst::{location, DEFAULT_WRITE_CONCURRENCY};
use crate::test_util::sst_util::{
assert_parquet_metadata_eq, build_test_binary_test_region_metadata, new_batch_by_range,
new_batch_with_binary, new_batch_with_custom_sequence, new_source, sst_file_handle,
sst_file_handle_with_file_id, sst_region_metadata,
new_batch_with_binary, new_source, sst_file_handle, sst_file_handle_with_file_id,
sst_region_metadata,
};
use crate::test_util::{check_reader_result, TestEnv};
@@ -895,84 +895,4 @@ mod tests {
assert!(cached.contains_row_group(2));
assert!(cached.contains_row_group(3));
}
#[tokio::test]
async fn test_read_with_override_sequence() {
let mut env = TestEnv::new().await;
let object_store = env.init_object_store_manager();
let handle = sst_file_handle(0, 1000);
let file_path = FixedPathProvider {
file_id: handle.file_id(),
};
let metadata = Arc::new(sst_region_metadata());
// Create batches with sequence 0 to trigger override functionality
let batch1 = new_batch_with_custom_sequence(&["a", "d"], 0, 60, 0);
let batch2 = new_batch_with_custom_sequence(&["b", "f"], 0, 40, 0);
let source = new_source(&[batch1, batch2]);
let write_opts = WriteOptions {
row_group_size: 50,
..Default::default()
};
let mut writer = ParquetWriter::new_with_object_store(
object_store.clone(),
metadata.clone(),
NoopIndexBuilder,
file_path,
)
.await;
writer
.write_all(source, None, &write_opts)
.await
.unwrap()
.remove(0);
// Read without override sequence (should read sequence 0)
let builder =
ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store.clone());
let mut reader = builder.build().await.unwrap();
let mut normal_batches = Vec::new();
while let Some(batch) = reader.next_batch().await.unwrap() {
normal_batches.push(batch);
}
// Read with override sequence using FileMeta.sequence
let custom_sequence = 12345u64;
let file_meta = handle.meta_ref();
let mut override_file_meta = file_meta.clone();
override_file_meta.sequence = Some(std::num::NonZero::new(custom_sequence).unwrap());
let override_handle = FileHandle::new(
override_file_meta,
Arc::new(crate::sst::file_purger::NoopFilePurger),
);
let builder =
ParquetReaderBuilder::new(FILE_DIR.to_string(), override_handle, object_store.clone());
let mut reader = builder.build().await.unwrap();
let mut override_batches = Vec::new();
while let Some(batch) = reader.next_batch().await.unwrap() {
override_batches.push(batch);
}
// Compare the results
assert_eq!(normal_batches.len(), override_batches.len());
for (normal, override_batch) in normal_batches.into_iter().zip(override_batches.iter()) {
// Create expected batch with override sequence
let expected_batch = {
let num_rows = normal.num_rows();
let mut builder = BatchBuilder::from(normal);
builder
.sequences_array(Arc::new(UInt64Array::from_value(custom_sequence, num_rows)))
.unwrap();
builder.build().unwrap()
};
// Override batch should match expected batch
assert_eq!(*override_batch, expected_batch);
}
}
}

View File

@@ -146,8 +146,6 @@ pub struct ReadFormat {
/// Field column id to their index in the projected schema (
/// the schema of [Batch]).
field_id_to_projected_index: HashMap<ColumnId, usize>,
/// Sequence number to override the sequence read from the SST.
override_sequence: Option<SequenceNumber>,
}
impl ReadFormat {
@@ -199,15 +197,9 @@ impl ReadFormat {
field_id_to_index,
projection_indices,
field_id_to_projected_index,
override_sequence: None,
}
}
/// Sets the sequence number to override.
pub(crate) fn set_override_sequence(&mut self, sequence: Option<SequenceNumber>) {
self.override_sequence = sequence;
}
/// Gets the arrow schema of the SST file.
///
/// This schema is computed from the region metadata but should be the same
@@ -226,20 +218,12 @@ impl ReadFormat {
&self.projection_indices
}
/// Creates a sequence array to override.
pub(crate) fn new_override_sequence_array(&self, length: usize) -> Option<ArrayRef> {
self.override_sequence
.map(|seq| Arc::new(UInt64Array::from_value(seq, length)) as ArrayRef)
}
/// Convert a arrow record batch into `batches`.
///
/// The length of `override_sequence_array` must be larger than the length of the record batch.
/// Note that the `record_batch` may only contains a subset of columns if it is projected.
pub fn convert_record_batch(
&self,
record_batch: &RecordBatch,
override_sequence_array: Option<&ArrayRef>,
batches: &mut VecDeque<Batch>,
) -> Result<()> {
debug_assert!(batches.is_empty());
@@ -262,23 +246,11 @@ impl ReadFormat {
.take(FIXED_POS_COLUMN_NUM);
// Safety: We have checked the column number.
let op_type_array = fixed_pos_columns.next().unwrap();
let mut sequence_array = fixed_pos_columns.next().unwrap().clone();
let sequence_array = fixed_pos_columns.next().unwrap();
let pk_array = fixed_pos_columns.next().unwrap();
let ts_array = fixed_pos_columns.next().unwrap();
let field_batch_columns = self.get_field_batch_columns(record_batch)?;
// Override sequence array if provided.
if let Some(override_array) = override_sequence_array {
assert!(override_array.len() >= sequence_array.len());
// It's fine to assign the override array directly, but we slice it to make
// sure it matches the length of the original sequence array.
sequence_array = if override_array.len() > sequence_array.len() {
override_array.slice(0, sequence_array.len())
} else {
override_array.clone()
};
}
// Compute primary key offsets.
let pk_dict_array = pk_array
.as_any()
@@ -719,39 +691,6 @@ pub(crate) fn parquet_row_group_time_range(
Some((Timestamp::new(min, unit), Timestamp::new(max, unit)))
}
/// Checks if sequence override is needed based on all row groups' statistics.
/// Returns true if ALL row groups have sequence min-max values of 0.
pub(crate) fn need_override_sequence(parquet_meta: &ParquetMetaData) -> bool {
let num_columns = parquet_meta.file_metadata().schema_descr().num_columns();
if num_columns < FIXED_POS_COLUMN_NUM {
return false;
}
// The sequence column is the second-to-last column (before op_type)
let sequence_pos = num_columns - 2;
// Check all row groups - all must have sequence min-max of 0
for row_group in parquet_meta.row_groups() {
if let Some(Statistics::Int64(value_stats)) = row_group.column(sequence_pos).statistics() {
if let (Some(min_val), Some(max_val)) = (value_stats.min_opt(), value_stats.max_opt()) {
// If any row group doesn't have min=0 and max=0, return false
if *min_val != 0 || *max_val != 0 {
return false;
}
} else {
// If any row group doesn't have statistics, return false
return false;
}
} else {
// If any row group doesn't have Int64 statistics, return false
return false;
}
}
// All row groups have sequence min-max of 0, or there are no row groups
!parquet_meta.row_groups().is_empty()
}
#[cfg(test)]
mod tests {
use api::v1::OpType;
@@ -836,19 +775,9 @@ mod tests {
}
fn new_batch(primary_key: &[u8], start_ts: i64, start_field: i64, num_rows: usize) -> Batch {
new_batch_with_sequence(primary_key, start_ts, start_field, num_rows, TEST_SEQUENCE)
}
fn new_batch_with_sequence(
primary_key: &[u8],
start_ts: i64,
start_field: i64,
num_rows: usize,
sequence: u64,
) -> Batch {
let ts_values = (0..num_rows).map(|i| start_ts + i as i64);
let timestamps = Arc::new(TimestampMillisecondVector::from_values(ts_values));
let sequences = Arc::new(UInt64Vector::from_vec(vec![sequence; num_rows]));
let sequences = Arc::new(UInt64Vector::from_vec(vec![TEST_SEQUENCE; num_rows]));
let op_types = Arc::new(UInt8Vector::from_vec(vec![TEST_OP_TYPE; num_rows]));
let fields = vec![
BatchColumn {
@@ -1001,7 +930,7 @@ mod tests {
let record_batch = RecordBatch::new_empty(arrow_schema);
let mut batches = VecDeque::new();
read_format
.convert_record_batch(&record_batch, None, &mut batches)
.convert_record_batch(&record_batch, &mut batches)
.unwrap();
assert!(batches.is_empty());
}
@@ -1028,7 +957,7 @@ mod tests {
let record_batch = RecordBatch::try_new(arrow_schema, columns).unwrap();
let mut batches = VecDeque::new();
read_format
.convert_record_batch(&record_batch, None, &mut batches)
.convert_record_batch(&record_batch, &mut batches)
.unwrap();
assert_eq!(
@@ -1036,45 +965,4 @@ mod tests {
batches.into_iter().collect::<Vec<_>>(),
);
}
#[test]
fn test_convert_record_batch_with_override_sequence() {
let metadata = build_test_region_metadata();
let column_ids: Vec<_> = metadata
.column_metadatas
.iter()
.map(|col| col.column_id)
.collect();
let read_format = ReadFormat::new(metadata, column_ids.iter().copied());
let columns: Vec<ArrayRef> = vec![
Arc::new(Int64Array::from(vec![1, 1, 10, 10])), // field1
Arc::new(Int64Array::from(vec![2, 2, 11, 11])), // field0
Arc::new(TimestampMillisecondArray::from(vec![1, 2, 11, 12])), // ts
build_test_pk_array(&[(b"one".to_vec(), 2), (b"two".to_vec(), 2)]), // primary key
Arc::new(UInt64Array::from(vec![TEST_SEQUENCE; 4])), // sequence
Arc::new(UInt8Array::from(vec![TEST_OP_TYPE; 4])), // op type
];
let arrow_schema = build_test_arrow_schema();
let record_batch = RecordBatch::try_new(arrow_schema, columns).unwrap();
// Create override sequence array with custom values
let override_sequence: u64 = 12345;
let override_sequence_array: ArrayRef =
Arc::new(UInt64Array::from_value(override_sequence, 4));
let mut batches = VecDeque::new();
read_format
.convert_record_batch(&record_batch, Some(&override_sequence_array), &mut batches)
.unwrap();
// Create expected batches with override sequence
let expected_batch1 = new_batch_with_sequence(b"one", 1, 1, 2, override_sequence);
let expected_batch2 = new_batch_with_sequence(b"two", 11, 10, 2, override_sequence);
assert_eq!(
vec![expected_batch1, expected_batch2],
batches.into_iter().collect::<Vec<_>>(),
);
}
}

View File

@@ -23,7 +23,6 @@ use async_trait::async_trait;
use common_recordbatch::filter::SimpleFilterEvaluator;
use common_telemetry::{debug, warn};
use datafusion_expr::Expr;
use datatypes::arrow::array::ArrayRef;
use datatypes::arrow::error::ArrowError;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::data_type::ConcreteDataType;
@@ -55,7 +54,7 @@ use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierRef;
use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef;
use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef;
use crate::sst::parquet::file_range::{FileRangeContext, FileRangeContextRef};
use crate::sst::parquet::format::{need_override_sequence, ReadFormat};
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::metadata::MetadataLoader;
use crate::sst::parquet::row_group::InMemoryRowGroup;
use crate::sst::parquet::row_selection::RowGroupSelection;
@@ -221,7 +220,7 @@ impl ParquetReaderBuilder {
let key_value_meta = parquet_meta.file_metadata().key_value_metadata();
// Gets the metadata stored in the SST.
let region_meta = Arc::new(Self::get_region_metadata(&file_path, key_value_meta)?);
let mut read_format = if let Some(column_ids) = &self.projection {
let read_format = if let Some(column_ids) = &self.projection {
ReadFormat::new(region_meta.clone(), column_ids.iter().copied())
} else {
// Lists all column ids to read, we always use the expected metadata if possible.
@@ -234,10 +233,6 @@ impl ParquetReaderBuilder {
.map(|col| col.column_id),
)
};
if need_override_sequence(&parquet_meta) {
read_format
.set_override_sequence(self.file_handle.meta_ref().sequence.map(|x| x.get()));
}
// Computes the projection mask.
let parquet_schema_desc = parquet_meta.file_metadata().schema_descr();
@@ -1235,7 +1230,12 @@ pub(crate) type RowGroupReader = RowGroupReaderBase<FileRangeContextRef>;
impl RowGroupReader {
/// Creates a new reader from file range.
pub(crate) fn new(context: FileRangeContextRef, reader: ParquetRecordBatchReader) -> Self {
Self::create(context, reader)
Self {
context,
reader,
batches: VecDeque::new(),
metrics: ReaderMetrics::default(),
}
}
}
@@ -1249,8 +1249,6 @@ pub(crate) struct RowGroupReaderBase<T> {
batches: VecDeque<Batch>,
/// Local scan metrics.
metrics: ReaderMetrics,
/// Cached sequence array to override sequences.
override_sequence: Option<ArrayRef>,
}
impl<T> RowGroupReaderBase<T>
@@ -1259,16 +1257,11 @@ where
{
/// Creates a new reader.
pub(crate) fn create(context: T, reader: ParquetRecordBatchReader) -> Self {
// The batch length from the reader should be less than or equal to DEFAULT_READ_BATCH_SIZE.
let override_sequence = context
.read_format()
.new_override_sequence_array(DEFAULT_READ_BATCH_SIZE);
Self {
context,
reader,
batches: VecDeque::new(),
metrics: ReaderMetrics::default(),
override_sequence,
}
}
@@ -1304,11 +1297,9 @@ where
};
self.metrics.num_record_batches += 1;
self.context.read_format().convert_record_batch(
&record_batch,
self.override_sequence.as_ref(),
&mut self.batches,
)?;
self.context
.read_format()
.convert_record_batch(&record_batch, &mut self.batches)?;
self.metrics.num_batches += self.batches.len();
}
let batch = self.batches.pop_front();

View File

@@ -72,7 +72,7 @@ use crate::error::Result;
use crate::flush::{WriteBufferManager, WriteBufferManagerRef};
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
use crate::read::{Batch, BatchBuilder, BatchReader};
use crate::sst::file_purger::{FilePurgerRef, NoopFilePurger};
use crate::sst::file_purger::{FilePurger, FilePurgerRef, NoopFilePurger, PurgeRequest};
use crate::sst::index::intermediate::IntermediateManager;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::time_provider::{StdTimeProvider, TimeProviderRef};

View File

@@ -138,17 +138,11 @@ pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
sst_file_handle_with_file_id(FileId::random(), start_ms, end_ms)
}
/// Creates a new batch with custom sequence for testing.
pub fn new_batch_with_custom_sequence(
tags: &[&str],
start: usize,
end: usize,
sequence: u64,
) -> Batch {
pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
assert!(end >= start);
let pk = new_primary_key(tags);
let timestamps: Vec<_> = (start..end).map(|v| v as i64).collect();
let sequences = vec![sequence; end - start];
let sequences = vec![1000; end - start];
let op_types = vec![OpType::Put; end - start];
let field: Vec<_> = (start..end).map(|v| v as u64).collect();
new_batch_builder(&pk, &timestamps, &sequences, &op_types, 2, &field)
@@ -156,10 +150,6 @@ pub fn new_batch_with_custom_sequence(
.unwrap()
}
pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
new_batch_with_custom_sequence(tags, start, end, 1000)
}
pub fn new_batch_with_binary(tags: &[&str], start: usize, end: usize) -> Batch {
assert!(end >= start);
let pk = new_primary_key(tags);

View File

@@ -69,7 +69,6 @@ table.workspace = true
tokio.workspace = true
tokio-util.workspace = true
tonic.workspace = true
tracing.workspace = true
[dev-dependencies]
common-meta = { workspace = true, features = ["testing"] }

View File

@@ -654,7 +654,7 @@ impl StatementExecutor {
ctx.clone(),
)?;
// TODO(dennis): validate the logical plan
//TODO(dennis): validate the logical plan
self.create_view_by_expr(expr, ctx).await
}
@@ -1389,11 +1389,12 @@ impl StatementExecutor {
async fn create_table_procedure(
&self,
create_table: CreateTableExpr,
partitions: Option<Partition>,
partitions: Vec<Partition>,
table_info: RawTableInfo,
query_context: QueryContextRef,
) -> Result<SubmitDdlTaskResponse> {
let partitions = partitions.map(|p| p.into()); // to PbPartition
let partitions = partitions.into_iter().map(Into::into).collect();
let request = SubmitDdlTaskRequest {
query_context,
task: DdlTask::new_create_table(create_table, partitions, table_info),
@@ -1589,7 +1590,7 @@ fn parse_partitions(
create_table: &CreateTableExpr,
partitions: Option<Partitions>,
query_ctx: &QueryContextRef,
) -> Result<(Option<MetaPartition>, Vec<String>)> {
) -> Result<(Vec<MetaPartition>, Vec<String>)> {
// If partitions are not defined by user, use the timestamp column (which has to be existed) as
// the partition column, and create only one partition.
let partition_columns = find_partition_columns(&partitions)?;
@@ -1599,26 +1600,23 @@ fn parse_partitions(
// Validates partition
let mut exprs = vec![];
for partition in &partition_entries {
if let PartitionBound::Expr(expr) = partition {
exprs.push(expr.clone());
for bound in partition {
if let PartitionBound::Expr(expr) = bound {
exprs.push(expr.clone());
}
}
}
MultiDimPartitionRule::try_new(partition_columns.clone(), vec![], exprs, true)
.context(InvalidPartitionSnafu)?;
let meta_partition = if partition_entries.is_empty() {
None
} else {
Some(
MetaPartition::try_from(PartitionDef::new(
partition_columns.clone(),
partition_entries,
))
Ok((
partition_entries
.into_iter()
.map(|x| MetaPartition::try_from(PartitionDef::new(partition_columns.clone(), x)))
.collect::<std::result::Result<_, _>>()
.context(DeserializePartitionSnafu)?,
)
};
Ok((meta_partition, partition_columns))
partition_columns,
))
}
fn create_table_info(
@@ -1729,7 +1727,7 @@ fn find_partition_entries(
partitions: &Option<Partitions>,
partition_columns: &[String],
query_ctx: &QueryContextRef,
) -> Result<Vec<PartitionBound>> {
) -> Result<Vec<Vec<PartitionBound>>> {
let entries = if let Some(partitions) = partitions {
// extract concrete data type of partition columns
let column_defs = partition_columns
@@ -1758,17 +1756,17 @@ fn find_partition_entries(
for partition in &partitions.exprs {
let partition_expr =
convert_one_expr(partition, &column_name_and_type, &query_ctx.timezone())?;
partition_exprs.push(PartitionBound::Expr(partition_expr));
partition_exprs.push(vec![PartitionBound::Expr(partition_expr)]);
}
// fallback for no expr
if partition_exprs.is_empty() {
partition_exprs.push(PartitionBound::MaxValue);
partition_exprs.push(vec![PartitionBound::MaxValue]);
}
partition_exprs
} else {
vec![PartitionBound::MaxValue]
vec![vec![PartitionBound::MaxValue]]
};
Ok(entries)
}

View File

@@ -48,21 +48,17 @@ pub trait PartitionRule: Sync + Send {
) -> Result<HashMap<RegionNumber, RegionMask>>;
}
/// The bound of one partition.
/// The right bound(exclusive) of partition range.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum PartitionBound {
/// Deprecated since 0.9.0.
Value(Value),
/// Deprecated since 0.15.0.
MaxValue,
Expr(crate::expr::PartitionExpr),
}
/// The partition definition of one table.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PartitionDef {
partition_columns: Vec<String>,
/// Each element represents one partition.
partition_bounds: Vec<PartitionBound>,
}

View File

@@ -47,7 +47,6 @@ lazy_static.workspace = true
moka = { workspace = true, features = ["sync"] }
once_cell.workspace = true
operator.workspace = true
ordered-float.workspace = true
paste.workspace = true
prometheus.workspace = true
query.workspace = true
@@ -60,7 +59,7 @@ sql.workspace = true
table.workspace = true
tokio.workspace = true
urlencoding = "2.1"
vrl.workspace = true
vrl = "0.24"
yaml-rust = "0.4"
[dev-dependencies]

View File

@@ -16,21 +16,23 @@ use std::sync::Arc;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pipeline::error::Result;
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo};
use serde_json::Deserializer;
use vrl::value::Value as VrlValue;
use pipeline::{
json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo,
};
use serde_json::{Deserializer, Value};
fn processor_mut(
pipeline: Arc<Pipeline>,
pipeline_ctx: &PipelineContext<'_>,
schema_info: &mut SchemaInfo,
input_values: Vec<VrlValue>,
input_values: Vec<Value>,
) -> Result<Vec<greptime_proto::v1::Row>> {
let mut result = Vec::with_capacity(input_values.len());
for v in input_values {
let payload = json_to_map(v).unwrap();
let r = pipeline
.exec_mut(v, pipeline_ctx, schema_info)?
.exec_mut(payload, pipeline_ctx, schema_info)?
.into_transformed()
.expect("expect transformed result ");
result.push(r.0);
@@ -235,7 +237,7 @@ transform:
fn criterion_benchmark(c: &mut Criterion) {
let input_value_str = include_str!("./data.log");
let input_value = Deserializer::from_str(input_value_str)
.into_iter::<VrlValue>()
.into_iter::<serde_json::Value>()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap();
let pipeline = prepare_pipeline();

View File

@@ -14,7 +14,6 @@
use common_telemetry::debug;
use snafu::OptionExt;
use vrl::value::Value as VrlValue;
use yaml_rust::Yaml;
use crate::error::{
@@ -22,7 +21,7 @@ use crate::error::{
ValueRequiredForDispatcherRuleSnafu,
};
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
use crate::etl::value::yaml_to_vrl_value;
use crate::Value;
const FIELD: &str = "field";
const PIPELINE: &str = "pipeline";
@@ -63,7 +62,7 @@ pub(crate) struct Dispatcher {
/// name
#[derive(Debug, PartialEq)]
pub(crate) struct Rule {
pub value: VrlValue,
pub value: Value,
pub table_suffix: String,
pub pipeline: Option<String>,
}
@@ -91,8 +90,7 @@ impl TryFrom<&Yaml> for Dispatcher {
if rule[VALUE].is_badvalue() {
ValueRequiredForDispatcherRuleSnafu.fail()?;
}
let value = yaml_to_vrl_value(&rule[VALUE])?;
let value = Value::try_from(&rule[VALUE])?;
Ok(Rule {
value,
@@ -111,9 +109,8 @@ impl TryFrom<&Yaml> for Dispatcher {
impl Dispatcher {
/// execute dispatcher and returns matched rule if any
pub(crate) fn exec(&self, data: &VrlValue) -> Option<&Rule> {
let data = data.as_object()?;
if let Some(value) = data.get(self.field.as_str()) {
pub(crate) fn exec(&self, data: &Value) -> Option<&Rule> {
if let Some(value) = data.get(&self.field) {
for rule in &self.rules {
if rule.value == *value {
return Some(rule);

View File

@@ -62,7 +62,7 @@ pub enum Error {
#[snafu(display("Processor {processor}: expect string value, but got {v:?}"))]
ProcessorExpectString {
processor: String,
v: vrl::value::Value,
v: crate::Value,
#[snafu(implicit)]
location: Location,
},
@@ -229,6 +229,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to get timestamp"))]
DateFailedToGetTimestamp {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid Pattern: '{s}'. {detail}"))]
DissectInvalidPattern {
s: String,
@@ -366,6 +372,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Url decoding error"))]
UrlEncodingDecode {
#[snafu(source)]
error: std::string::FromUtf8Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid transform on_failure value: {value}"))]
TransformOnFailureInvalidValue {
value: String,
@@ -420,6 +433,17 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Null type not supported"))]
CoerceUnsupportedNullType {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Null type not supported when to coerce '{ty}' type"))]
CoerceUnsupportedNullTypeTo {
ty: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Type: {ty} value not supported for Epoch"))]
CoerceUnsupportedEpochType {
ty: String,
@@ -532,6 +556,12 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Input value must be an object"))]
InputValueMustBeObject {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Column options error"))]
ColumnOptions {
#[snafu(source)]
@@ -545,6 +575,12 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported number type: {value:?}"))]
UnsupportedNumberType {
value: serde_json::Number,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to parse json"))]
JsonParse {
#[snafu(source)]
@@ -658,6 +694,14 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Float is not a number: {}", input_float))]
FloatNaN {
input_float: f64,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid timestamp value: {}", input))]
InvalidTimestamp {
input: String,
@@ -665,13 +709,14 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid epoch value '{}' for resolution '{}'", value, resolution))]
InvalidEpochForResolution {
value: i64,
resolution: String,
#[snafu(display("Failed to convert bytes to utf8"))]
BytesToUtf8 {
#[snafu(source)]
error: std::string::FromUtf8Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Please don't use regex in Vrl script"))]
VrlRegexValue {
#[snafu(implicit)]
@@ -763,21 +808,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Float is NaN"))]
FloatIsNan {
#[snafu(source)]
error: ordered_float::FloatIsNan,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported type in pipeline: {}", ty))]
UnsupportedTypeInPipeline {
ty: String,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -828,6 +858,7 @@ impl ErrorExt for Error {
| DateParseTimezone { .. }
| DateParse { .. }
| DateFailedToGetLocalTimezone { .. }
| DateFailedToGetTimestamp { .. }
| DissectInvalidPattern { .. }
| DissectEmptyPattern { .. }
| DissectSplitExceedsInput { .. }
@@ -850,6 +881,7 @@ impl ErrorExt for Error {
| RegexNoValidPattern { .. }
| UrlEncodingInvalidMethod { .. }
| DigestPatternInvalid { .. }
| UrlEncodingDecode { .. }
| TransformOnFailureInvalidValue { .. }
| TransformElementMustBeMap { .. }
| TransformFieldMustBeSet { .. }
@@ -859,6 +891,8 @@ impl ErrorExt for Error {
| TransformTimestampIndexCount { .. }
| AutoTransformOneTimestamp { .. }
| InvalidVersionNumber { .. }
| CoerceUnsupportedNullType { .. }
| CoerceUnsupportedNullTypeTo { .. }
| CoerceUnsupportedEpochType { .. }
| CoerceStringToType { .. }
| CoerceJsonTypeTo { .. }
@@ -874,8 +908,10 @@ impl ErrorExt for Error {
| ValueYamlKeyMustBeString { .. }
| YamlLoad { .. }
| YamlParse { .. }
| InputValueMustBeObject { .. }
| ColumnOptions { .. }
| UnsupportedIndexType { .. }
| UnsupportedNumberType { .. }
| IdentifyPipelineColumnTypeMismatch { .. }
| JsonParse { .. }
| JsonPathParse { .. }
@@ -888,14 +924,12 @@ impl ErrorExt for Error {
| InvalidTableSuffixTemplate { .. }
| CompileVrl { .. }
| ExecuteVrl { .. }
| FloatNaN { .. }
| BytesToUtf8 { .. }
| InvalidTimestamp { .. }
| VrlRegexValue { .. }
| VrlReturnValue { .. }
| PipelineMissing { .. } => StatusCode::InvalidArguments,
FloatIsNan { .. }
| InvalidEpochForResolution { .. }
| UnsupportedTypeInPipeline { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -19,19 +19,21 @@ pub mod processor;
pub mod transform;
pub mod value;
use std::collections::BTreeMap;
use api::v1::Row;
use common_time::timestamp::TimeUnit;
use itertools::Itertools;
use processor::{Processor, Processors};
use snafu::{ensure, OptionExt, ResultExt};
use transform::Transforms;
use vrl::core::Value as VrlValue;
use value::Value;
use yaml_rust::{Yaml, YamlLoader};
use crate::dispatcher::{Dispatcher, Rule};
use crate::error::{
AutoTransformOneTimestampSnafu, Error, IntermediateKeyIndexSnafu, InvalidVersionNumberSnafu,
Result, YamlLoadSnafu, YamlParseSnafu,
AutoTransformOneTimestampSnafu, Error, InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu,
InvalidVersionNumberSnafu, Result, YamlLoadSnafu, YamlParseSnafu,
};
use crate::etl::processor::ProcessorKind;
use crate::etl::transform::transformer::greptime::values_to_row;
@@ -226,7 +228,7 @@ impl DispatchedTo {
#[derive(Debug)]
pub enum PipelineExecOutput {
Transformed(TransformedOutput),
DispatchedTo(DispatchedTo, VrlValue),
DispatchedTo(DispatchedTo, Value),
}
#[derive(Debug)]
@@ -259,6 +261,40 @@ impl PipelineExecOutput {
}
}
pub fn json_to_map(val: serde_json::Value) -> Result<Value> {
match val {
serde_json::Value::Object(map) => {
let mut intermediate_state = BTreeMap::new();
for (k, v) in map {
intermediate_state.insert(k, Value::try_from(v)?);
}
Ok(Value::Map(intermediate_state.into()))
}
_ => InputValueMustBeObjectSnafu.fail(),
}
}
pub fn json_array_to_map(val: Vec<serde_json::Value>) -> Result<Vec<Value>> {
val.into_iter().map(json_to_map).collect()
}
pub fn simd_json_to_map(val: simd_json::OwnedValue) -> Result<Value> {
match val {
simd_json::OwnedValue::Object(map) => {
let mut intermediate_state = BTreeMap::new();
for (k, v) in map.into_iter() {
intermediate_state.insert(k, Value::try_from(v)?);
}
Ok(Value::Map(intermediate_state.into()))
}
_ => InputValueMustBeObjectSnafu.fail(),
}
}
pub fn simd_json_array_to_map(val: Vec<simd_json::OwnedValue>) -> Result<Vec<Value>> {
val.into_iter().map(simd_json_to_map).collect()
}
impl Pipeline {
fn is_v1(&self) -> bool {
self.doc_version == PipelineDocVersion::V1
@@ -266,7 +302,7 @@ impl Pipeline {
pub fn exec_mut(
&self,
mut val: VrlValue,
mut val: Value,
pipeline_ctx: &PipelineContext<'_>,
schema_info: &mut SchemaInfo,
) -> Result<PipelineExecOutput> {
@@ -297,9 +333,9 @@ impl Pipeline {
table_suffix,
}));
}
// continue v2 process, and set the rest fields with auto-transform
// continue v2 process, check ts column and set the rest fields with auto-transform
// if transformer presents, then ts has been set
values_to_row(schema_info, val, pipeline_ctx, Some(values), false)?
values_to_row(schema_info, val, pipeline_ctx, Some(values))?
}
TransformerMode::AutoTransform(ts_name, time_unit) => {
// infer ts from the context
@@ -311,7 +347,7 @@ impl Pipeline {
));
let n_ctx =
PipelineContext::new(&def, pipeline_ctx.pipeline_param, pipeline_ctx.channel);
values_to_row(schema_info, val, &n_ctx, None, true)?
values_to_row(schema_info, val, &n_ctx, None)?
}
};
@@ -373,14 +409,11 @@ macro_rules! setup_pipeline {
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use std::sync::Arc;
use api::v1::Rows;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{self, ColumnDataType, SemanticType};
use vrl::prelude::Bytes;
use vrl::value::KeyString;
use super::*;
@@ -421,7 +454,7 @@ transform:
session::context::Channel::Unknown,
);
let payload = input_value.into();
let payload = json_to_map(input_value).unwrap();
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -482,10 +515,9 @@ transform:
&pipeline_param,
session::context::Channel::Unknown,
);
let payload = VrlValue::Object(BTreeMap::from([(
KeyString::from("message"),
VrlValue::Bytes(Bytes::from(message)),
)]));
let mut payload = BTreeMap::new();
payload.insert("message".to_string(), Value::String(message));
let payload = Value::Map(payload.into());
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
@@ -581,7 +613,7 @@ transform:
session::context::Channel::Unknown,
);
let payload = input_value.into();
let payload = json_to_map(input_value).unwrap();
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -634,7 +666,7 @@ transform:
session::context::Channel::Unknown,
);
let schema = pipeline.schemas().unwrap().clone();
let result = input_value.into();
let result = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(result, &pipeline_ctx, &mut schema_info)
@@ -700,7 +732,7 @@ transform:
assert_eq!(
dispatcher.rules[0],
crate::dispatcher::Rule {
value: VrlValue::Bytes(Bytes::from("http")),
value: Value::String("http".to_string()),
table_suffix: "http_events".to_string(),
pipeline: None
}
@@ -709,7 +741,7 @@ transform:
assert_eq!(
dispatcher.rules[1],
crate::dispatcher::Rule {
value: VrlValue::Bytes(Bytes::from("database")),
value: Value::String("database".to_string()),
table_suffix: "db_events".to_string(),
pipeline: Some("database_pipeline".to_string()),
}

View File

@@ -19,10 +19,10 @@ use ahash::{HashMap, HashMapExt};
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
use session::context::{QueryContext, QueryContextRef};
use snafu::OptionExt;
use vrl::value::Value as VrlValue;
use crate::error::{Result, ValueMustBeMapSnafu};
use crate::tablesuffix::TableSuffixTemplate;
use crate::Value;
const GREPTIME_AUTO_CREATE_TABLE: &str = "greptime_auto_create_table";
const GREPTIME_TTL: &str = "greptime_ttl";
@@ -86,34 +86,32 @@ impl ContextOpt {
}
impl ContextOpt {
pub fn from_pipeline_map_to_opt(value: &mut VrlValue) -> Result<Self> {
let map = value.as_object_mut().context(ValueMustBeMapSnafu)?;
pub fn from_pipeline_map_to_opt(pipeline_map: &mut Value) -> Result<Self> {
let pipeline_map = pipeline_map.as_map_mut().context(ValueMustBeMapSnafu)?;
let mut opt = Self::default();
for k in PIPELINE_HINT_KEYS {
if let Some(v) = map.remove(k) {
let v = v.to_string_lossy().to_string();
if let Some(v) = pipeline_map.remove(k) {
match k {
GREPTIME_AUTO_CREATE_TABLE => {
opt.auto_create_table = Some(v);
opt.auto_create_table = Some(v.to_str_value());
}
GREPTIME_TTL => {
opt.ttl = Some(v);
opt.ttl = Some(v.to_str_value());
}
GREPTIME_APPEND_MODE => {
opt.append_mode = Some(v);
opt.append_mode = Some(v.to_str_value());
}
GREPTIME_MERGE_MODE => {
opt.merge_mode = Some(v);
opt.merge_mode = Some(v.to_str_value());
}
GREPTIME_PHYSICAL_TABLE => {
opt.physical_table = Some(v);
opt.physical_table = Some(v.to_str_value());
}
GREPTIME_SKIP_WAL => {
opt.skip_wal = Some(v);
opt.skip_wal = Some(v.to_str_value());
}
GREPTIME_TABLE_SUFFIX => {
opt.table_suffix = Some(v);
opt.table_suffix = Some(v.to_str_value());
}
_ => {}
}
@@ -125,7 +123,7 @@ impl ContextOpt {
pub(crate) fn resolve_table_suffix(
&mut self,
table_suffix: Option<&TableSuffixTemplate>,
pipeline_map: &VrlValue,
pipeline_map: &Value,
) -> Option<String> {
self.table_suffix
.take()

View File

@@ -28,7 +28,7 @@ pub mod regex;
pub mod select;
pub mod simple_extract;
pub mod urlencoding;
pub mod vrl_processor;
pub mod vrl;
use std::str::FromStr;
@@ -47,7 +47,6 @@ use letter::LetterProcessor;
use regex::RegexProcessor;
use snafu::{OptionExt, ResultExt};
use urlencoding::UrlEncodingProcessor;
use vrl::value::Value as VrlValue;
use crate::error::{
Error, FailedParseFieldFromStringSnafu, FieldMustBeTypeSnafu, InvalidFieldRenameSnafu,
@@ -58,7 +57,8 @@ use crate::etl::field::{Field, Fields};
use crate::etl::processor::json_parse::JsonParseProcessor;
use crate::etl::processor::select::SelectProcessor;
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
use crate::etl::processor::vrl_processor::VrlProcessor;
use crate::etl::processor::vrl::VrlProcessor;
use crate::Value;
const FIELD_NAME: &str = "field";
const FIELDS_NAME: &str = "fields";
@@ -123,7 +123,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
fn ignore_missing(&self) -> bool;
/// Execute the processor on a vector which be preprocessed by the pipeline
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue>;
fn exec_mut(&self, val: Value) -> Result<Value>;
}
#[derive(Debug)]
@@ -224,7 +224,7 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
json_parse::PROCESSOR_JSON_PARSE => {
ProcessorKind::JsonParse(JsonParseProcessor::try_from(value)?)
}
vrl_processor::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
};

View File

@@ -18,22 +18,20 @@
use std::collections::BTreeMap;
use ordered_float::NotNan;
use snafu::{OptionExt, ResultExt};
use urlencoding::decode;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
CmcdMissingKeySnafu, CmcdMissingValueSnafu, Error, FailedToParseFloatKeySnafu,
FailedToParseIntKeySnafu, FloatIsNanSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
FailedToParseIntKeySnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_CMCD: &str = "cmcd";
@@ -78,43 +76,42 @@ const CMCD_KEYS: [&str; 18] = [
];
/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<VrlValue> {
Ok(VrlValue::Boolean(true))
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value> {
Ok(Value::Boolean(true))
}
/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val: i64 = v
.parse()
.context(FailedToParseIntKeySnafu { key: k, value: v })?;
Ok(VrlValue::Integer(val))
Ok(Value::Int64(val))
}
/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
Ok(VrlValue::Bytes(Bytes::from(v.to_string())))
Ok(Value::String(v.to_string()))
}
/// function to resolve CMCD_KEY_NOR
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val = match decode(v) {
Ok(val) => val.to_string(),
Err(_) => v.to_string(),
};
Ok(VrlValue::Bytes(Bytes::from(val)))
Ok(Value::String(val))
}
/// function to resolve CMCD_KEY_PR
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val: f64 = v
.parse()
.context(FailedToParseFloatKeySnafu { key: k, value: v })?;
let val = NotNan::new(val).context(FloatIsNanSnafu)?;
Ok(VrlValue::Float(val))
Ok(Value::Float64(val))
}
/// Common Media Client Data Specification:
@@ -159,11 +156,11 @@ pub struct CmcdProcessor {
}
impl CmcdProcessor {
fn generate_key(prefix: &str, key: &str) -> KeyString {
KeyString::from(format!("{}_{}", prefix, key))
fn generate_key(prefix: &str, key: &str) -> String {
format!("{}_{}", prefix, key)
}
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<String, Value>> {
let mut working_set = BTreeMap::new();
let parts = value.split(',');
@@ -253,18 +250,16 @@ impl Processor for CmcdProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let name = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(name) {
Some(VrlValue::Bytes(s)) => {
let s = String::from_utf8_lossy(s);
let results = self.parse(field.target_or_input_field(), &s)?;
val.extend(results);
match val.get(name) {
Some(Value::String(s)) => {
let results = self.parse(field.target_or_input_field(), s)?;
val.extend(results.into())?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -293,6 +288,7 @@ mod tests {
use super::*;
use crate::etl::field::{Field, Fields};
use crate::etl::value::Value;
#[test]
fn test_cmcd() {
@@ -301,23 +297,23 @@ mod tests {
"sid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
)],
),
(
"br%3D3200%2Cbs%2Cd%3D4004%2Cmtp%3D25400%2Cot%3Dv%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Ctb%3D6000",
vec![
("prefix_bs", VrlValue::Boolean(true)),
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
("prefix_rtp", VrlValue::Integer(15000)),
("prefix_br", VrlValue::Integer(3200)),
("prefix_tb", VrlValue::Integer(6000)),
("prefix_d", VrlValue::Integer(4004)),
("prefix_bs", Value::Boolean(true)),
("prefix_ot", Value::String("v".into())),
("prefix_rtp", Value::Int64(15000)),
("prefix_br", Value::Int64(3200)),
("prefix_tb", Value::Int64(6000)),
("prefix_d", Value::Int64(4004)),
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
("prefix_mtp", VrlValue::Integer(25400)),
("prefix_mtp", Value::Int64(25400)),
],
),
(
@@ -326,16 +322,16 @@ mod tests {
vec![
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
("prefix_rtp", VrlValue::Integer(15000)),
("prefix_rtp", Value::Int64(15000)),
],
),
(
"bs%2Csu",
vec![
("prefix_su", VrlValue::Boolean(true)),
("prefix_bs", VrlValue::Boolean(true)),
("prefix_su", Value::Boolean(true)),
("prefix_bs", Value::Boolean(true)),
],
),
(
@@ -350,7 +346,7 @@ mod tests {
// "prefix_com.examplemyStringKey",
// Value::String("\"myStringValue\"".into()),
// ),
("prefix_d", VrlValue::Integer(4004)),
("prefix_d", Value::Int64(4004)),
],
),
(
@@ -358,11 +354,11 @@ mod tests {
vec![
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
(
"prefix_nor",
VrlValue::Bytes(Bytes::from("\"../300kbps/segment35.m4v\"")),
Value::String("\"../300kbps/segment35.m4v\"".into()),
),
],
@@ -370,56 +366,56 @@ mod tests {
(
"nrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
("prefix_nrr", Value::String("\"12323-48763\"".into())),
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
],
),
(
"nor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
("prefix_nrr", Value::String("\"12323-48763\"".into())),
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
(
"prefix_nor",
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
Value::String("\"../300kbps/track.m4v\"".into()),
),
],
),
(
"bl%3D21300%2Cbr%3D3200%2Cbs%2Ccid%3D%22faec5fc2-ac30-11eabb37-0242ac130002%22%2Cd%3D4004%2Cdl%3D18500%2Cmtp%3D48100%2Cnor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Cot%3Dv%2Cpr%3D1.08%2Crtp%3D12000%2Csf%3Dd%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Cst%3Dv%2Csu%2Ctb%3D6000",
vec![
("prefix_bl", VrlValue::Integer(21300)),
("prefix_bs", VrlValue::Boolean(true)),
("prefix_st", VrlValue::Bytes(Bytes::from("v"))),
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
("prefix_bl", Value::Int64(21300)),
("prefix_bs", Value::Boolean(true)),
("prefix_st", Value::String("v".into())),
("prefix_ot", Value::String("v".into())),
(
"prefix_sid",
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
),
("prefix_tb", VrlValue::Integer(6000)),
("prefix_d", VrlValue::Integer(4004)),
("prefix_tb", Value::Int64(6000)),
("prefix_d", Value::Int64(4004)),
(
"prefix_cid",
VrlValue::Bytes(Bytes::from("\"faec5fc2-ac30-11eabb37-0242ac130002\"")),
Value::String("\"faec5fc2-ac30-11eabb37-0242ac130002\"".into()),
),
("prefix_mtp", VrlValue::Integer(48100)),
("prefix_rtp", VrlValue::Integer(12000)),
("prefix_mtp", Value::Int64(48100)),
("prefix_rtp", Value::Int64(12000)),
(
"prefix_nor",
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
Value::String("\"../300kbps/track.m4v\"".into()),
),
("prefix_sf", VrlValue::Bytes(Bytes::from("d"))),
("prefix_br", VrlValue::Integer(3200)),
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
("prefix_pr", VrlValue::Float(NotNan::new(1.08).unwrap())),
("prefix_su", VrlValue::Boolean(true)),
("prefix_dl", VrlValue::Integer(18500)),
("prefix_sf", Value::String("d".into())),
("prefix_br", Value::Int64(3200)),
("prefix_nrr", Value::String("\"12323-48763\"".into())),
("prefix_pr", Value::Float64(1.08)),
("prefix_su", Value::Boolean(true)),
("prefix_dl", Value::Int64(18500)),
],
),
];
@@ -436,8 +432,8 @@ mod tests {
let expected = vec
.into_iter()
.map(|(k, v)| (KeyString::from(k.to_string()), v))
.collect::<BTreeMap<KeyString, VrlValue>>();
.map(|(k, v)| (k.to_string(), v))
.collect::<BTreeMap<String, Value>>();
let actual = processor.parse("prefix", &decoded).unwrap();
assert_eq!(actual, expected);

View File

@@ -20,19 +20,17 @@ use csv::{ReaderBuilder, Trim};
use itertools::EitherOrBoth::{Both, Left, Right};
use itertools::Itertools;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
CsvNoRecordSnafu, CsvQuoteNameSnafu, CsvReadSnafu, CsvSeparatorNameSnafu, Error,
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_CSV: &str = "csv";
@@ -62,8 +60,8 @@ pub struct CsvProcessor {
impl CsvProcessor {
// process the csv format string to a map with target_fields as keys
fn process(&self, val: &[u8]) -> Result<BTreeMap<KeyString, VrlValue>> {
let mut reader = self.reader.from_reader(val);
fn process(&self, val: &str) -> Result<BTreeMap<String, Value>> {
let mut reader = self.reader.from_reader(val.as_bytes());
if let Some(result) = reader.records().next() {
let record: csv::StringRecord = result.context(CsvReadSnafu)?;
@@ -73,18 +71,17 @@ impl CsvProcessor {
.iter()
.zip_longest(record.iter())
.filter_map(|zipped| match zipped {
Both(target_field, val) => Some((
KeyString::from(target_field.clone()),
VrlValue::Bytes(Bytes::from(val.to_string())),
)),
Both(target_field, val) => {
Some((target_field.clone(), Value::String(val.into())))
}
// if target fields are more than extracted fields, fill the rest with empty value
Left(target_field) => {
let value = self
.empty_value
.as_ref()
.map(|s| VrlValue::Bytes(Bytes::from(s.clone())))
.unwrap_or(VrlValue::Null);
Some((KeyString::from(target_field.clone()), value))
.map(|s| Value::String(s.clone()))
.unwrap_or(Value::Null);
Some((target_field.clone(), value))
}
// if extracted fields are more than target fields, ignore the rest
Right(_) => None,
@@ -193,18 +190,16 @@ impl Processor for CsvProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let name = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(name) {
Some(VrlValue::Bytes(v)) => {
Some(Value::String(v)) => {
let results = self.process(v)?;
val.extend(results);
val.extend(results.into())?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -243,11 +238,11 @@ mod tests {
..Default::default()
};
let result = processor.process(b"1,2").unwrap();
let result = processor.process("1,2").unwrap();
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
]
.into_iter()
.collect();
@@ -269,12 +264,12 @@ mod tests {
..Default::default()
};
let result = processor.process(b"1,2").unwrap();
let result = processor.process("1,2").unwrap();
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
(KeyString::from("c"), VrlValue::Null),
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
("c".into(), Value::Null),
]
.into_iter()
.collect();
@@ -294,15 +289,12 @@ mod tests {
..Default::default()
};
let result = processor.process(b"1,2").unwrap();
let result = processor.process("1,2").unwrap();
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
(
KeyString::from("c"),
VrlValue::Bytes(Bytes::from("default")),
),
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
("c".into(), Value::String("default".into())),
]
.into_iter()
.collect();
@@ -323,11 +315,11 @@ mod tests {
..Default::default()
};
let result = processor.process(b"1,2").unwrap();
let result = processor.process("1,2").unwrap();
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
]
.into_iter()
.collect();

View File

@@ -14,22 +14,22 @@
use std::sync::Arc;
use chrono::{DateTime, NaiveDateTime, Utc};
use chrono::{DateTime, NaiveDateTime};
use chrono_tz::Tz;
use lazy_static::lazy_static;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DateFailedToGetLocalTimezoneSnafu, DateParseSnafu, DateParseTimezoneSnafu, Error,
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorFailedToParseStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
DateFailedToGetLocalTimezoneSnafu, DateFailedToGetTimestampSnafu, DateParseSnafu,
DateParseTimezoneSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorFailedToParseStringSnafu, ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::{Timestamp, Value};
pub(crate) const PROCESSOR_DATE: &str = "date";
@@ -162,7 +162,7 @@ pub struct DateProcessor {
}
impl DateProcessor {
fn parse(&self, val: &str) -> Result<DateTime<Utc>> {
fn parse(&self, val: &str) -> Result<Timestamp> {
let mut tz = Tz::UTC;
if let Some(timezone) = &self.timezone {
tz = timezone.parse::<Tz>().context(DateParseTimezoneSnafu {
@@ -171,8 +171,8 @@ impl DateProcessor {
}
for fmt in self.formats.iter() {
if let Ok(utc_ts) = try_parse(val, fmt, tz) {
return Ok(utc_ts);
if let Ok(ns) = try_parse(val, fmt, tz) {
return Ok(Timestamp::Nanosecond(ns));
}
}
@@ -193,19 +193,16 @@ impl Processor for DateProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Bytes(s)) => {
let timestamp = self.parse(String::from_utf8_lossy(s).as_ref())?;
Some(Value::String(s)) => {
let timestamp = self.parse(s)?;
let output_key = field.target_or_input_field();
val.insert(KeyString::from(output_key), VrlValue::Timestamp(timestamp));
val.insert(output_key.to_string(), Value::Timestamp(timestamp))?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -227,19 +224,21 @@ impl Processor for DateProcessor {
}
}
// parse the datetime with timezone info
// if failed, try to parse using naive date time and add tz info
// finally convert the datetime to utc
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<DateTime<Utc>> {
/// try to parse val with timezone first, if failed, parse without timezone
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64> {
if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
Ok(dt.to_utc())
Ok(dt
.timestamp_nanos_opt()
.context(DateFailedToGetTimestampSnafu)?)
} else {
let dt = NaiveDateTime::parse_from_str(val, fmt)
.context(DateParseSnafu { value: val })?
.and_local_timezone(tz)
.single()
.context(DateFailedToGetLocalTimezoneSnafu)?;
Ok(dt.to_utc())
Ok(dt
.timestamp_nanos_opt()
.context(DateFailedToGetTimestampSnafu)?)
}
}

View File

@@ -21,17 +21,15 @@
use once_cell::sync::Lazy;
use regex::Regex;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DECOLORIZE: &str = "decolorize";
@@ -45,15 +43,13 @@ pub struct DecolorizeProcessor {
}
impl DecolorizeProcessor {
fn process_string(&self, val: &str) -> Result<VrlValue> {
Ok(VrlValue::Bytes(Bytes::from(
RE.replace_all(val, "").to_string(),
)))
fn process_string(&self, val: &str) -> Result<Value> {
Ok(Value::String(RE.replace_all(val, "").into_owned()))
}
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
fn process(&self, val: &Value) -> Result<Value> {
match val {
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
Value::String(val) => self.process_string(val),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_DECOLORIZE,
v: val.clone(),
@@ -105,12 +101,11 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -122,7 +117,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index), result);
val.insert(output_index.to_string(), result)?;
}
}
}
@@ -141,19 +136,16 @@ mod tests {
ignore_missing: false,
};
let val = VrlValue::Bytes(Bytes::from("\x1b[32mGreen\x1b[0m".to_string()));
let val = Value::String("\x1b[32mGreen\x1b[0m".to_string());
let result = processor.process(&val).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("Green".to_string())));
assert_eq!(result, Value::String("Green".to_string()));
let val = VrlValue::Bytes(Bytes::from("Plain text".to_string()));
let val = Value::String("Plain text".to_string());
let result = processor.process(&val).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("Plain text".to_string()))
);
assert_eq!(result, Value::String("Plain text".to_string()));
let val = VrlValue::Bytes(Bytes::from("\x1b[46mfoo\x1b[0m bar".to_string()));
let val = Value::String("\x1b[46mfoo\x1b[0m bar".to_string());
let result = processor.process(&val).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("foo bar".to_string())));
assert_eq!(result, Value::String("foo bar".to_string()));
}
}

View File

@@ -23,17 +23,16 @@ use std::borrow::Cow;
use regex::Regex;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DigestPatternInvalidSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DIGEST: &str = "digest";
@@ -101,7 +100,7 @@ impl DigestProcessor {
re.replace_all(val, "").to_string()
}
fn process_string(&self, val: &str) -> Result<VrlValue> {
fn process_string(&self, val: &str) -> Result<Value> {
let mut input = Cow::from(val);
for pattern in &self.patterns {
if let Cow::Owned(new_string) = pattern.replace_all(&input, "") {
@@ -109,12 +108,12 @@ impl DigestProcessor {
}
}
Ok(VrlValue::Bytes(Bytes::from(input.to_string())))
Ok(Value::String(input.into_owned()))
}
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
fn process(&self, val: &Value) -> Result<Value> {
match val {
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
Value::String(val) => self.process_string(val),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_DIGEST,
v: val.clone(),
@@ -201,12 +200,11 @@ impl crate::etl::processor::Processor for DigestProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -218,7 +216,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index), result);
val.insert(output_index.to_string(), result)?;
}
}
}
@@ -239,31 +237,24 @@ mod tests {
patterns: vec![PresetPattern::Ip.regex()],
};
let input = VrlValue::Bytes(Bytes::from("192.168.1.1".to_string()));
let input = Value::String("192.168.1.1".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = VrlValue::Bytes(Bytes::from("192.168.1.1:8080".to_string()));
assert_eq!(result, Value::String("".to_string()));
let input = Value::String("192.168.1.1:8080".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from(
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string(),
));
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from(
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string(),
));
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("not an ip".to_string()));
let input = Value::String("not an ip".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("not an ip".to_string()))
);
assert_eq!(result, Value::String("not an ip".to_string()));
}
#[test]
@@ -274,40 +265,29 @@ mod tests {
patterns: vec![PresetPattern::Uuid.regex()],
};
// UUID v4
let input = VrlValue::Bytes(Bytes::from(
"123e4567-e89b-12d3-a456-426614174000".to_string(),
));
let input = Value::String("123e4567-e89b-12d3-a456-426614174000".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// UUID v1
let input = VrlValue::Bytes(Bytes::from(
"6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string(),
));
let input = Value::String("6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// UUID v5
let input = VrlValue::Bytes(Bytes::from(
"886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string(),
));
let input = Value::String("886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// UUID with uppercase letters
let input = VrlValue::Bytes(Bytes::from(
"A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string(),
));
let input = Value::String("A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// Negative case
let input = VrlValue::Bytes(Bytes::from("not a uuid".to_string()));
let input = Value::String("not a uuid".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("not a uuid".to_string()))
);
assert_eq!(result, Value::String("not a uuid".to_string()));
}
#[test]
@@ -319,48 +299,45 @@ mod tests {
};
// Basic brackets
let input = VrlValue::Bytes(Bytes::from("[content]".to_string()));
let input = Value::String("[content]".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("(content)".to_string()));
let input = Value::String("(content)".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// Chinese brackets
let input = VrlValue::Bytes(Bytes::from("「content」".to_string()));
let input = Value::String("「content」".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("『content』".to_string()));
let input = Value::String("『content』".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("【content】".to_string()));
let input = Value::String("【content】".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// Unmatched/unclosed brackets should not match
let input = VrlValue::Bytes(Bytes::from("[content".to_string()));
let input = Value::String("[content".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("[content".to_string())));
assert_eq!(result, Value::String("[content".to_string()));
let input = VrlValue::Bytes(Bytes::from("content]".to_string()));
let input = Value::String("content]".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("content]".to_string())));
assert_eq!(result, Value::String("content]".to_string()));
// Bad case
let input = VrlValue::Bytes(Bytes::from("[content}".to_string()));
let input = Value::String("[content}".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
// Negative case
let input = VrlValue::Bytes(Bytes::from("no brackets".to_string()));
let input = Value::String("no brackets".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no brackets".to_string()))
);
assert_eq!(result, Value::String("no brackets".to_string()));
}
#[test]
@@ -371,19 +348,16 @@ mod tests {
patterns: vec![PresetPattern::Quoted.regex()],
};
let input = VrlValue::Bytes(Bytes::from("\"quoted content\"".to_string()));
let input = Value::String("\"quoted content\"".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("no quotes".to_string()));
let input = Value::String("no quotes".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no quotes".to_string()))
);
let input = VrlValue::Bytes(Bytes::from("".to_string()));
assert_eq!(result, Value::String("no quotes".to_string()));
let input = Value::String("".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
}
#[test]
@@ -394,18 +368,15 @@ mod tests {
patterns: vec![Regex::new(r"\d+").unwrap()],
};
let input = VrlValue::Bytes(Bytes::from("12345".to_string()));
let input = Value::String("12345".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
let input = VrlValue::Bytes(Bytes::from("no digits".to_string()));
let input = Value::String("no digits".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no digits".to_string()))
);
let input = VrlValue::Bytes(Bytes::from("".to_string()));
assert_eq!(result, Value::String("no digits".to_string()));
let input = Value::String("".to_string());
let result = processor.process(&input).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
assert_eq!(result, Value::String("".to_string()));
}
}

View File

@@ -17,8 +17,6 @@ use std::ops::Deref;
use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
use itertools::Itertools;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DissectAppendOrderAlreadySetSnafu, DissectConsecutiveNamesSnafu, DissectEmptyPatternSnafu,
@@ -26,13 +24,13 @@ use crate::error::{
DissectNoMatchingPatternSnafu, DissectOrderOnlyAppendModifierSnafu,
DissectOrderOnlyAppendSnafu, DissectSplitExceedsInputSnafu, DissectSplitNotMatchInputSnafu,
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERNS_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DISSECT: &str = "dissect";
@@ -423,7 +421,7 @@ impl DissectProcessor {
name: &'a Name,
value: String,
appends: &mut HashMap<&'a String, Vec<(String, u32)>>,
map: &mut Vec<(&'a String, VrlValue)>,
map: &mut Vec<(&'a String, Value)>,
) {
match name.start_modifier {
Some(StartModifier::NamedSkip) => {
@@ -440,16 +438,12 @@ impl DissectProcessor {
// because transform can know the key name
}
None => {
map.push((&name.name, VrlValue::Bytes(Bytes::from(value))));
map.push((&name.name, Value::String(value)));
}
}
}
fn process_pattern(
&self,
chs: &[char],
pattern: &Pattern,
) -> Result<Vec<(KeyString, VrlValue)>> {
fn process_pattern(&self, chs: &[char], pattern: &Pattern) -> Result<Vec<(String, Value)>> {
let mut map = Vec::new();
let mut pos = 0;
@@ -529,17 +523,14 @@ impl DissectProcessor {
for (name, mut values) in appends {
values.sort_by(|a, b| a.1.cmp(&b.1));
let value = values.into_iter().map(|(a, _)| a).join(sep);
map.push((name, VrlValue::Bytes(Bytes::from(value))));
map.push((name, Value::String(value)));
}
}
Ok(map
.into_iter()
.map(|(k, v)| (KeyString::from(k.clone()), v))
.collect())
Ok(map.into_iter().map(|(k, v)| (k.to_string(), v)).collect())
}
fn process(&self, val: &str) -> Result<Vec<(KeyString, VrlValue)>> {
fn process(&self, val: &str) -> Result<Vec<(String, Value)>> {
let chs = val.chars().collect::<Vec<char>>();
for pattern in &self.patterns {
@@ -609,18 +600,17 @@ impl Processor for DissectProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Bytes(val_str)) => {
let r = self.process(String::from_utf8_lossy(val_str).as_ref())?;
Some(Value::String(val_str)) => {
let r = self.process(val_str)?;
for (k, v) in r {
val.insert(k, v);
val.insert(k, v)?;
}
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -649,18 +639,17 @@ fn is_valid_char(ch: char) -> bool {
#[cfg(test)]
mod tests {
use ahash::HashMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use super::{DissectProcessor, EndModifier, Name, Part, StartModifier};
use crate::etl::processor::dissect::Pattern;
use crate::etl::value::Value;
fn assert(pattern_str: &str, input: &str, expected: HashMap<KeyString, VrlValue>) {
fn assert(pattern_str: &str, input: &str, expected: HashMap<String, Value>) {
let chs = input.chars().collect::<Vec<char>>();
let patterns: Vec<Pattern> = vec![pattern_str.parse().unwrap()];
let processor = DissectProcessor::default();
let result: HashMap<KeyString, VrlValue> = processor
let result: HashMap<String, Value> = processor
.process_pattern(&chs, &patterns[0])
.unwrap()
.into_iter()
@@ -1002,13 +991,8 @@ mod tests {
("httpversion", "1.0"),
]
.into_iter()
.map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
})
.collect::<HashMap<KeyString, VrlValue>>();
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
.collect::<HashMap<String, Value>>();
{
// pattern start with Name
@@ -1048,12 +1032,9 @@ mod tests {
]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
(pattern, input, map)
});
@@ -1061,7 +1042,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<KeyString, VrlValue>>(),
expected.collect::<HashMap<String, Value>>(),
);
}
}
@@ -1082,12 +1063,9 @@ mod tests {
]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
(pattern, input, map)
});
@@ -1095,7 +1073,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<KeyString, VrlValue>>(),
expected.collect::<HashMap<String, Value>>(),
);
}
}
@@ -1112,12 +1090,9 @@ mod tests {
)]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
(pattern, input, map)
});
@@ -1125,7 +1100,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<KeyString, VrlValue>>(),
expected.collect::<HashMap<String, Value>>(),
);
}
}

View File

@@ -12,26 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, InvalidEpochForResolutionSnafu,
KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
ValueMustBeMapSnafu,
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::{
use crate::etl::value::time::{
MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
};
use crate::etl::value::{Timestamp, Value};
pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
const RESOLUTION_NAME: &str = "resolution";
@@ -45,18 +43,6 @@ pub(crate) enum Resolution {
Nano,
}
impl std::fmt::Display for Resolution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let text = match self {
Resolution::Second => SECOND_RESOLUTION,
Resolution::Milli => MILLISECOND_RESOLUTION,
Resolution::Micro => MICROSECOND_RESOLUTION,
Resolution::Nano => NANOSECOND_RESOLUTION,
};
write!(f, "{}", text)
}
}
impl TryFrom<&str> for Resolution {
type Error = Error;
@@ -98,36 +84,43 @@ pub struct EpochProcessor {
}
impl EpochProcessor {
fn parse(&self, val: &VrlValue) -> Result<DateTime<Utc>> {
let t: i64 =
match val {
VrlValue::Bytes(bytes) => String::from_utf8_lossy(bytes).parse::<i64>().context(
FailedToParseIntSnafu {
value: val.to_string_lossy(),
},
)?,
VrlValue::Integer(ts) => *ts,
VrlValue::Float(not_nan) => not_nan.into_inner() as i64,
VrlValue::Timestamp(date_time) => return Ok(*date_time),
_ => {
return ProcessorUnsupportedValueSnafu {
processor: PROCESSOR_EPOCH,
val: val.to_string(),
}
.fail();
fn parse(&self, val: &Value) -> Result<Timestamp> {
let t: i64 = match val {
Value::String(s) => s
.parse::<i64>()
.context(FailedToParseIntSnafu { value: s })?,
Value::Int16(i) => *i as i64,
Value::Int32(i) => *i as i64,
Value::Int64(i) => *i,
Value::Uint8(i) => *i as i64,
Value::Uint16(i) => *i as i64,
Value::Uint32(i) => *i as i64,
Value::Uint64(i) => *i as i64,
Value::Float32(f) => *f as i64,
Value::Float64(f) => *f as i64,
Value::Timestamp(t) => match self.resolution {
Resolution::Second => t.timestamp(),
Resolution::Milli => t.timestamp_millis(),
Resolution::Micro => t.timestamp_micros(),
Resolution::Nano => t.timestamp_nanos(),
},
_ => {
return ProcessorUnsupportedValueSnafu {
processor: PROCESSOR_EPOCH,
val: val.to_string(),
}
};
.fail();
}
};
match self.resolution {
Resolution::Second => DateTime::from_timestamp(t, 0),
Resolution::Milli => DateTime::from_timestamp_millis(t),
Resolution::Micro => DateTime::from_timestamp_micros(t),
Resolution::Nano => Some(DateTime::from_timestamp_nanos(t)),
Resolution::Second => Ok(Timestamp::Second(t)),
Resolution::Milli => Ok(Timestamp::Millisecond(t)),
Resolution::Micro => Ok(Timestamp::Microsecond(t)),
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
}
.context(InvalidEpochForResolutionSnafu {
value: t,
resolution: self.resolution.to_string(),
})
}
}
@@ -181,12 +174,11 @@ impl Processor for EpochProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -198,10 +190,7 @@ impl Processor for EpochProcessor {
Some(v) => {
let timestamp = self.parse(v)?;
let output_index = field.target_or_input_field();
val.insert(
KeyString::from(output_index.to_string()),
VrlValue::Timestamp(timestamp),
);
val.insert(output_index.to_string(), Value::Timestamp(timestamp))?;
}
}
}
@@ -211,12 +200,8 @@ impl Processor for EpochProcessor {
#[cfg(test)]
mod tests {
use chrono::DateTime;
use ordered_float::NotNan;
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use super::EpochProcessor;
use crate::etl::value::Value;
#[test]
fn test_parse_epoch() {
@@ -226,15 +211,15 @@ mod tests {
};
let values = [
VrlValue::Bytes(Bytes::from("1573840000")),
VrlValue::Integer(1573840000),
VrlValue::Integer(1573840000),
VrlValue::Float(NotNan::new(1573840000.0).unwrap()),
Value::String("1573840000".into()),
Value::Int32(1573840000),
Value::Uint64(1573840000),
Value::Float32(1573840000.0),
];
for value in values {
let parsed = processor.parse(&value).unwrap();
assert_eq!(parsed, DateTime::from_timestamp(1573840000, 0).unwrap());
assert_eq!(parsed, super::Timestamp::Second(1573840000));
}
}
}

View File

@@ -14,19 +14,17 @@
use regex::Regex;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, GsubPatternRequiredSnafu, GsubReplacementRequiredSnafu, KeyMustBeStringSnafu,
ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, RegexSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_GSUB: &str = "gsub";
@@ -42,16 +40,16 @@ pub struct GsubProcessor {
}
impl GsubProcessor {
fn process_string(&self, val: &str) -> Result<VrlValue> {
fn process_string(&self, val: &str) -> Result<Value> {
let new_val = self.pattern.replace_all(val, &self.replacement).to_string();
let val = VrlValue::Bytes(Bytes::from(new_val));
let val = Value::String(new_val);
Ok(val)
}
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
fn process(&self, val: &Value) -> Result<Value> {
match val {
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
Value::String(val) => self.process_string(val),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_GSUB,
v: val.clone(),
@@ -119,12 +117,11 @@ impl crate::etl::processor::Processor for GsubProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -136,7 +133,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index.to_string()), result);
val.insert(output_index.to_string(), result)?;
}
}
}
@@ -148,6 +145,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
mod tests {
use super::*;
use crate::etl::processor::gsub::GsubProcessor;
use crate::etl::value::Value;
#[test]
fn test_string_value() {
@@ -158,9 +156,9 @@ mod tests {
ignore_missing: false,
};
let val = VrlValue::Bytes(Bytes::from("123"));
let val = Value::String("123".to_string());
let result = processor.process(&val).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("xxx")));
assert_eq!(result, Value::String("xxx".to_string()));
}
}

View File

@@ -13,18 +13,17 @@
// limitations under the License.
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, JoinSeparatorRequiredSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, SEPARATOR_NAME,
};
use crate::etl::value::{Array, Value};
pub(crate) const PROCESSOR_JOIN: &str = "join";
@@ -37,14 +36,14 @@ pub struct JoinProcessor {
}
impl JoinProcessor {
fn process(&self, arr: &[VrlValue]) -> Result<VrlValue> {
fn process(&self, arr: &Array) -> Result<Value> {
let val = arr
.iter()
.map(|v| v.to_string_lossy())
.collect::<Vec<_>>()
.map(|v| v.to_str_value())
.collect::<Vec<String>>()
.join(&self.separator);
Ok(VrlValue::Bytes(Bytes::from(val)))
Ok(Value::String(val))
}
}
@@ -95,17 +94,16 @@ impl Processor for JoinProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Array(arr)) => {
Some(Value::Array(arr)) => {
let result = self.process(arr)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index.to_string()), result);
val.insert(output_index.to_string(), result)?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -131,10 +129,8 @@ impl Processor for JoinProcessor {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::processor::join::JoinProcessor;
use crate::etl::value::Value;
#[test]
fn test_join_processor() {
@@ -144,10 +140,11 @@ mod tests {
};
let arr = vec![
VrlValue::Bytes(Bytes::from("a")),
VrlValue::Bytes(Bytes::from("b")),
];
Value::String("a".to_string()),
Value::String("b".to_string()),
]
.into();
let result = processor.process(&arr).unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("a-b")));
assert_eq!(result, Value::String("a-b".to_string()));
}
}

View File

@@ -13,17 +13,16 @@
// limitations under the License.
use snafu::{OptionExt as _, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, FieldMustBeTypeSnafu, JsonParseSnafu, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu,
ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
ProcessorUnsupportedValueSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::Processor;
use crate::{json_to_map, Processor, Value};
pub(crate) const PROCESSOR_JSON_PARSE: &str = "json_parse";
@@ -68,21 +67,21 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonParseProcessor {
}
impl JsonParseProcessor {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
fn process_field(&self, val: &Value) -> Result<Value> {
let Some(json_str) = val.as_str() else {
return FieldMustBeTypeSnafu {
field: val.to_string(),
field: val.to_str_type(),
ty: "string",
}
.fail();
};
let parsed: VrlValue = serde_json::from_str(&json_str).context(JsonParseSnafu)?;
let parsed: serde_json::Value = serde_json::from_str(json_str).context(JsonParseSnafu)?;
match parsed {
VrlValue::Object(_) => Ok(parsed),
VrlValue::Array(_) => Ok(parsed),
serde_json::Value::Object(_) => Ok(json_to_map(parsed)?),
serde_json::Value::Array(arr) => Ok(Value::Array(arr.try_into()?)),
_ => ProcessorUnsupportedValueSnafu {
processor: self.kind(),
val: val.to_string(),
val: val.to_str_type(),
}
.fail(),
}
@@ -98,15 +97,14 @@ impl Processor for JsonParseProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index.to_string()), processed);
val.insert(output_index.to_string(), processed)?;
}
None => {
if !self.ignore_missing {
@@ -125,27 +123,24 @@ impl Processor for JsonParseProcessor {
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::etl::processor::json_parse::JsonParseProcessor;
#[test]
fn test_json_parse() {
use super::*;
use crate::Value;
let processor = JsonParseProcessor {
..Default::default()
};
let result = processor
.process_field(&VrlValue::Bytes(Bytes::from(r#"{"hello": "world"}"#)))
.process_field(&Value::String(r#"{"hello": "world"}"#.to_string()))
.unwrap();
let expected = VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world")),
)]));
let expected = Value::Map(crate::Map::one(
"hello".to_string(),
Value::String("world".to_string()),
));
assert_eq!(result, expected);
}

View File

@@ -14,17 +14,17 @@
use jsonpath_rust::JsonPath;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, JsonParseSnafu, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
Error, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
};
use crate::Value;
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
@@ -84,7 +84,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessor {
#[derive(Debug)]
pub struct JsonPathProcessor {
fields: Fields,
json_path: JsonPath<serde_json::Value>,
json_path: JsonPath<Value>,
ignore_missing: bool,
result_index: Option<usize>,
}
@@ -101,22 +101,17 @@ impl Default for JsonPathProcessor {
}
impl JsonPathProcessor {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
let v = serde_json::to_value(val).context(JsonParseSnafu)?;
let p = self.json_path.find(&v);
match p {
serde_json::Value::Array(arr) => {
fn process_field(&self, val: &Value) -> Result<Value> {
let processed = self.json_path.find(val);
match processed {
Value::Array(arr) => {
if let Some(index) = self.result_index {
Ok(arr
.get(index)
.cloned()
.map(|v| v.into())
.unwrap_or(VrlValue::Null))
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
} else {
Ok(VrlValue::Array(arr.into_iter().map(|v| v.into()).collect()))
Ok(Value::Array(arr))
}
}
v => Ok(v.into()),
v => Ok(v),
}
}
}
@@ -130,15 +125,14 @@ impl Processor for JsonPathProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index), processed);
val.insert(output_index.to_string(), processed)?;
}
None => {
if !self.ignore_missing {
@@ -157,13 +151,12 @@ impl Processor for JsonPathProcessor {
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
use crate::Map;
#[test]
fn test_json_path() {
use super::*;
use crate::Value;
let json_path = JsonPath::try_from("$.hello").unwrap();
let processor = JsonPathProcessor {
@@ -173,11 +166,11 @@ mod test {
};
let result = processor
.process_field(&VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world")),
)])))
.process_field(&Value::Map(Map::one(
"hello",
Value::String("world".to_string()),
)))
.unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("world")));
assert_eq!(result, Value::String("world".to_string()));
}
}

View File

@@ -13,18 +13,17 @@
// limitations under the License.
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, LetterInvalidMethodSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
ProcessorMissingFieldSnafu, Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, METHOD_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_LETTER: &str = "letter";
@@ -68,14 +67,15 @@ pub struct LetterProcessor {
}
impl LetterProcessor {
fn process_field(&self, val: &Bytes) -> VrlValue {
match self.method {
Method::Upper => VrlValue::Bytes(Bytes::from(val.to_ascii_uppercase())),
Method::Lower => VrlValue::Bytes(Bytes::from(val.to_ascii_lowercase())),
Method::Capital => VrlValue::Bytes(Bytes::from(capitalize(
String::from_utf8_lossy(val).as_ref(),
))),
}
fn process_field(&self, val: &str) -> Result<Value> {
let processed = match self.method {
Method::Upper => val.to_uppercase(),
Method::Lower => val.to_lowercase(),
Method::Capital => capitalize(val),
};
let val = Value::String(processed);
Ok(val)
}
}
@@ -125,17 +125,16 @@ impl Processor for LetterProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Bytes(s)) => {
let result = self.process_field(s);
Some(Value::String(s)) => {
let result = self.process_field(s)?;
let output_key = field.target_or_input_field();
val.insert(KeyString::from(output_key), result);
val.insert(output_key.to_string(), result)?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -168,10 +167,8 @@ fn capitalize(s: &str) -> String {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::processor::letter::{LetterProcessor, Method};
use crate::etl::value::Value;
#[test]
fn test_process() {
@@ -180,8 +177,8 @@ mod tests {
method: Method::Upper,
..Default::default()
};
let processed = processor.process_field(&Bytes::from("pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("PIPELINE")), processed)
let processed = processor.process_field("pipeline").unwrap();
assert_eq!(Value::String("PIPELINE".into()), processed)
}
{
@@ -189,8 +186,8 @@ mod tests {
method: Method::Lower,
..Default::default()
};
let processed = processor.process_field(&Bytes::from("Pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("pipeline")), processed)
let processed = processor.process_field("Pipeline").unwrap();
assert_eq!(Value::String("pipeline".into()), processed)
}
{
@@ -198,8 +195,8 @@ mod tests {
method: Method::Capital,
..Default::default()
};
let processed = processor.process_field(&Bytes::from("pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("Pipeline")), processed)
let processed = processor.process_field("pipeline").unwrap();
assert_eq!(Value::String("Pipeline".into()), processed)
}
}
}

View File

@@ -23,19 +23,18 @@ use std::collections::BTreeMap;
use lazy_static::lazy_static;
use regex::Regex;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu,
RegexNamedGroupNotFoundSnafu, RegexNoValidFieldSnafu, RegexNoValidPatternSnafu, RegexSnafu,
Result, ValueMustBeMapSnafu,
Result,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
lazy_static! {
static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
@@ -169,17 +168,14 @@ impl RegexProcessor {
Ok(())
}
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<String, Value>> {
let mut result = BTreeMap::new();
for gr in self.patterns.iter() {
if let Some(captures) = gr.regex.captures(val) {
for group in gr.groups.iter() {
if let Some(capture) = captures.name(group) {
let value = capture.as_str().to_string();
result.insert(
KeyString::from(generate_key(prefix, group)),
VrlValue::Bytes(Bytes::from(value)),
);
result.insert(generate_key(prefix, group), Value::String(value));
}
}
}
@@ -197,17 +193,16 @@ impl Processor for RegexProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let prefix = field.target_or_input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Bytes(s)) => {
let result = self.process(prefix, String::from_utf8_lossy(s).as_ref())?;
val.extend(result);
Some(Value::String(s)) => {
let result = self.process(prefix, s)?;
val.extend(result.into())?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -231,11 +226,12 @@ impl Processor for RegexProcessor {
}
#[cfg(test)]
mod tests {
use ahash::{HashMap, HashMapExt};
use itertools::Itertools;
use vrl::value::Value as VrlValue;
use super::*;
use crate::etl::processor::regex::RegexProcessor;
use crate::etl::value::{Map, Value};
#[test]
fn test_simple_parse() {
@@ -254,11 +250,15 @@ ignore_missing: false"#;
let result = processor.process("a", "123").unwrap();
let v = vec![(KeyString::from("a_ar"), VrlValue::Bytes(Bytes::from("1")))]
.into_iter()
.collect::<BTreeMap<KeyString, VrlValue>>();
let map = Map { values: result };
assert_eq!(v, result);
let v = Map {
values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
.into_iter()
.collect(),
};
assert_eq!(v, map);
}
#[test]
@@ -270,30 +270,15 @@ ignore_missing: false"#;
let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");
let temporary_map: BTreeMap<KeyString, VrlValue> = [
(
"breadcrumbs_parent",
VrlValue::Bytes(Bytes::from(cc.to_string())),
),
(
"breadcrumbs_edge",
VrlValue::Bytes(Bytes::from(cg.to_string())),
),
(
"breadcrumbs_origin",
VrlValue::Bytes(Bytes::from(co.to_string())),
),
(
"breadcrumbs_peer",
VrlValue::Bytes(Bytes::from(cp.to_string())),
),
(
"breadcrumbs_wrapper",
VrlValue::Bytes(Bytes::from(cw.to_string())),
),
let temporary_map: BTreeMap<String, Value> = [
("breadcrumbs_parent", Value::String(cc.to_string())),
("breadcrumbs_edge", Value::String(cg.to_string())),
("breadcrumbs_origin", Value::String(co.to_string())),
("breadcrumbs_peer", Value::String(cp.to_string())),
("breadcrumbs_wrapper", Value::String(cw.to_string())),
]
.into_iter()
.map(|(k, v)| (KeyString::from(k), v))
.map(|(k, v)| (k.to_string(), v))
.collect();
{
@@ -346,66 +331,35 @@ ignore_missing: false"#;
let processor_yaml_hash = processor_yaml.as_hash().unwrap();
let processor = RegexProcessor::try_from(processor_yaml_hash).unwrap();
let mut result = BTreeMap::new();
let mut result = HashMap::new();
for field in processor.fields.iter() {
let s = temporary_map.get(field.input_field()).unwrap();
let s = s.to_string_lossy();
let s = temporary_map
.get(field.input_field())
.unwrap()
.to_str_value();
let prefix = field.target_or_input_field();
let r = processor.process(prefix, s.as_ref()).unwrap();
let r = processor.process(prefix, &s).unwrap();
result.extend(r);
}
let new_values = vec![
(
"edge_ip",
VrlValue::Bytes(Bytes::from("12.34.567.89".to_string())),
),
(
"edge_request_id",
VrlValue::Bytes(Bytes::from("12345678".to_string())),
),
(
"edge_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"edge_asn",
VrlValue::Bytes(Bytes::from("20940".to_string())),
),
(
"origin_ip",
VrlValue::Bytes(Bytes::from("987.654.321.09".to_string())),
),
(
"peer_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"peer_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"parent_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"parent_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"wrapper_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"wrapper_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
("edge_ip", Value::String("12.34.567.89".to_string())),
("edge_request_id", Value::String("12345678".to_string())),
("edge_geo", Value::String("US_CA_SANJOSE".to_string())),
("edge_asn", Value::String("20940".to_string())),
("origin_ip", Value::String("987.654.321.09".to_string())),
("peer_asn", Value::String("55155".to_string())),
("peer_geo", Value::String("US_CA_SANJOSE".to_string())),
("parent_asn", Value::String("55155".to_string())),
("parent_geo", Value::String("US_CA_SANJOSE".to_string())),
("wrapper_asn", Value::String("55155".to_string())),
("wrapper_geo", Value::String("US_CA_SANJOSE".to_string())),
]
.into_iter()
.map(|(k, v)| (KeyString::from(k), v))
.collect::<BTreeMap<KeyString, VrlValue>>();
.map(|(k, v)| (k.to_string(), v))
.collect();
assert_eq!(result, new_values);
}

View File

@@ -14,7 +14,6 @@
use ahash::{HashSet, HashSetExt};
use snafu::OptionExt;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
@@ -23,7 +22,7 @@ use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME, TYPE_NAME,
};
use crate::Processor;
use crate::{Processor, Value};
pub(crate) const PROCESSOR_SELECT: &str = "select";
const INCLUDE_KEY: &str = "include";
@@ -99,8 +98,8 @@ impl Processor for SelectProcessor {
true
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
let v_map = val.as_object_mut().context(ValueMustBeMapSnafu)?;
fn exec_mut(&self, mut val: Value) -> Result<Value> {
let v_map = val.as_map_mut().context(ValueMustBeMapSnafu)?;
match self.select_type {
SelectType::Include => {
@@ -110,7 +109,7 @@ impl Processor for SelectProcessor {
let field_name = field.input_field();
if let Some(target_name) = field.target_field() {
if let Some(v) = v_map.remove(field_name) {
v_map.insert(KeyString::from(target_name), v);
v_map.insert(target_name.to_string(), v);
}
include_key_set.insert(target_name);
} else {
@@ -134,12 +133,9 @@ impl Processor for SelectProcessor {
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::etl::field::{Field, Fields};
use crate::etl::processor::select::{SelectProcessor, SelectType};
use crate::Processor;
use crate::{Map, Processor, Value};
#[test]
fn test_select() {
@@ -149,24 +145,15 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(VrlValue::Object(p));
let result = processor.exec_mut(Value::Map(Map { values: p }));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_object_mut().unwrap();
let p = result.as_map_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(
p.get(&KeyString::from("hello")),
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
);
assert_eq!(p.get("hello"), Some(&Value::String("world".to_string())));
}
#[test]
@@ -177,24 +164,15 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(VrlValue::Object(p));
let result = processor.exec_mut(Value::Map(Map { values: p }));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_object_mut().unwrap();
let p = result.as_map_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(
p.get(&KeyString::from("hello3")),
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
);
assert_eq!(p.get("hello3"), Some(&Value::String("world".to_string())));
}
#[test]
@@ -205,24 +183,15 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
let result = processor.exec_mut(VrlValue::Object(p));
let result = processor.exec_mut(Value::Map(Map { values: p }));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_object_mut().unwrap();
let p = result.as_map_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get(&KeyString::from("hello")), None);
assert_eq!(
p.get(&KeyString::from("hello2")),
Some(&VrlValue::Bytes(Bytes::from("world2".to_string())))
);
assert_eq!(p.get("hello"), None);
assert_eq!(p.get("hello2"), Some(&Value::String("world2".to_string())));
}
}

View File

@@ -13,17 +13,14 @@
// limitations under the License.
use snafu::OptionExt as _;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::error::{Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, KEY_NAME,
};
use crate::Processor;
use crate::{Processor, Value};
pub(crate) const PROCESSOR_SIMPLE_EXTRACT: &str = "simple_extract";
@@ -77,14 +74,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for SimpleExtractProcessor {
}
impl SimpleExtractProcessor {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
fn process_field(&self, val: &Value) -> Result<Value> {
let mut current = val;
for key in self.key.iter() {
let VrlValue::Object(map) = current else {
return Ok(VrlValue::Null);
let Value::Map(map) = current else {
return Ok(Value::Null);
};
let Some(v) = map.get(key.as_str()) else {
return Ok(VrlValue::Null);
let Some(v) = map.get(key) else {
return Ok(Value::Null);
};
current = v;
}
@@ -101,15 +98,14 @@ impl Processor for SimpleExtractProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index), processed);
val.insert(output_index.to_string(), processed)?;
}
None => {
if !self.ignore_missing {
@@ -128,13 +124,11 @@ impl Processor for SimpleExtractProcessor {
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
#[test]
fn test_simple_extract() {
use super::*;
use crate::{Map, Value};
let processor = SimpleExtractProcessor {
key: vec!["hello".to_string()],
@@ -142,12 +136,12 @@ mod test {
};
let result = processor
.process_field(&VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
)])))
.process_field(&Value::Map(Map::one(
"hello",
Value::String("world".to_string()),
)))
.unwrap();
assert_eq!(result, VrlValue::Bytes(Bytes::from("world".to_string())));
assert_eq!(result, Value::String("world".to_string()));
}
}

View File

@@ -12,20 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use snafu::OptionExt;
use urlencoding::{decode_binary, encode_binary};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use snafu::{OptionExt, ResultExt};
use urlencoding::{decode, encode};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
UrlEncodingInvalidMethodSnafu, ValueMustBeMapSnafu,
UrlEncodingDecodeSnafu, UrlEncodingInvalidMethodSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, METHOD_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";
@@ -66,12 +65,12 @@ pub struct UrlEncodingProcessor {
}
impl UrlEncodingProcessor {
fn process_field(&self, val: &Bytes) -> Result<VrlValue> {
fn process_field(&self, val: &str) -> Result<Value> {
let processed = match self.method {
Method::Encode => Bytes::from_iter(encode_binary(val).bytes()),
Method::Decode => Bytes::from(decode_binary(val).to_vec()),
Method::Encode => encode(val).to_string(),
Method::Decode => decode(val).context(UrlEncodingDecodeSnafu)?.into_owned(),
};
Ok(VrlValue::Bytes(processed))
Ok(Value::String(processed))
}
}
@@ -126,17 +125,16 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, mut val: Value) -> Result<Value> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(VrlValue::Bytes(s)) => {
Some(Value::String(s)) => {
let result = self.process_field(s)?;
let output_index = field.target_or_input_field();
val.insert(KeyString::from(output_index), result);
val.insert(output_index.to_string(), result)?;
}
Some(VrlValue::Null) | None => {
Some(Value::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -161,11 +159,9 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::field::Fields;
use crate::etl::processor::urlencoding::UrlEncodingProcessor;
use crate::etl::value::Value;
#[test]
fn test_decode_url() {
@@ -174,8 +170,8 @@ mod tests {
{
let processor = UrlEncodingProcessor::default();
let result = processor.process_field(&Bytes::from(encoded)).unwrap();
assert_eq!(VrlValue::Bytes(Bytes::from(decoded)), result)
let result = processor.process_field(encoded).unwrap();
assert_eq!(Value::String(decoded.into()), result)
}
{
let processor = UrlEncodingProcessor {
@@ -183,8 +179,8 @@ mod tests {
method: super::Method::Encode,
ignore_missing: false,
};
let result = processor.process_field(&Bytes::from(decoded)).unwrap();
assert_eq!(VrlValue::Bytes(Bytes::from(encoded)), result)
let result = processor.process_field(decoded).unwrap();
assert_eq!(Value::String(encoded.into()), result)
}
}
}

View File

@@ -15,18 +15,19 @@
use std::collections::BTreeMap;
use chrono_tz::Tz;
use snafu::OptionExt;
use snafu::{OptionExt, ResultExt};
use vrl::compiler::runtime::Runtime;
use vrl::compiler::{compile, Program, TargetValue};
use vrl::diagnostic::Formatter;
use vrl::prelude::TimeZone;
use vrl::value::{Kind, Secrets, Value as VrlValue};
use vrl::prelude::{Bytes, NotNan, TimeZone};
use vrl::value::{KeyString, Kind, Secrets, Value as VrlValue};
use crate::error::{
CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
VrlReturnValueSnafu,
BytesToUtf8Snafu, CompileVrlSnafu, Error, ExecuteVrlSnafu, FloatNaNSnafu,
InvalidTimestampSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu, VrlReturnValueSnafu,
};
use crate::etl::processor::yaml_string;
use crate::Value as PipelineValue;
pub(crate) const PROCESSOR_VRL: &str = "vrl";
const SOURCE: &str = "source";
@@ -61,9 +62,11 @@ impl VrlProcessor {
Ok(Self { source, program })
}
pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
pub fn resolve(&self, m: PipelineValue) -> Result<PipelineValue> {
let pipeline_vrl = pipeline_value_to_vrl_value(m)?;
let mut target = TargetValue {
value,
value: pipeline_vrl,
metadata: VrlValue::Object(BTreeMap::new()),
secrets: Secrets::default(),
};
@@ -79,7 +82,7 @@ impl VrlProcessor {
.build()
})?;
Ok(re)
vrl_value_to_pipeline_value(re)
}
}
@@ -110,17 +113,91 @@ impl crate::etl::processor::Processor for VrlProcessor {
true
}
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
fn exec_mut(&self, val: PipelineValue) -> Result<PipelineValue> {
let val = self.resolve(val)?;
if let VrlValue::Object(_) = val {
Ok(val)
if let PipelineValue::Map(m) = val {
Ok(PipelineValue::Map(m.values.into()))
} else {
VrlRegexValueSnafu.fail()
}
}
}
fn pipeline_value_to_vrl_value(v: PipelineValue) -> Result<VrlValue> {
match v {
PipelineValue::Null => Ok(VrlValue::Null),
PipelineValue::Int8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int64(x) => Ok(VrlValue::Integer(x)),
PipelineValue::Uint8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint64(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Float32(x) => NotNan::new(x as f64)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Float64(x) => NotNan::new(x)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Boolean(x) => Ok(VrlValue::Boolean(x)),
PipelineValue::String(x) => Ok(VrlValue::Bytes(Bytes::copy_from_slice(x.as_bytes()))),
PipelineValue::Timestamp(x) => x
.to_datetime()
.context(InvalidTimestampSnafu {
input: x.to_string(),
})
.map(VrlValue::Timestamp),
PipelineValue::Array(array) => Ok(VrlValue::Array(
array
.into_iter()
.map(pipeline_value_to_vrl_value)
.collect::<Result<Vec<_>>>()?,
)),
PipelineValue::Map(m) => {
let values = m
.values
.into_iter()
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
.collect::<Result<BTreeMap<_, _>>>()?;
Ok(VrlValue::Object(values))
}
}
}
fn vrl_value_to_pipeline_value(v: VrlValue) -> Result<PipelineValue> {
match v {
VrlValue::Bytes(bytes) => String::from_utf8(bytes.to_vec())
.context(BytesToUtf8Snafu)
.map(PipelineValue::String),
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
VrlValue::Integer(x) => Ok(PipelineValue::Int64(x)),
VrlValue::Float(not_nan) => Ok(PipelineValue::Float64(not_nan.into_inner())),
VrlValue::Boolean(b) => Ok(PipelineValue::Boolean(b)),
VrlValue::Timestamp(date_time) => crate::etl::value::Timestamp::from_datetime(date_time)
.context(InvalidTimestampSnafu {
input: date_time.to_string(),
})
.map(PipelineValue::Timestamp),
VrlValue::Object(bm) => {
let b = bm
.into_iter()
.map(|(k, v)| vrl_value_to_pipeline_value(v).map(|v| (k.to_string(), v)))
.collect::<Result<BTreeMap<String, PipelineValue>>>()?;
Ok(PipelineValue::Map(b.into()))
}
VrlValue::Array(values) => {
let a = values
.into_iter()
.map(vrl_value_to_pipeline_value)
.collect::<Result<Vec<_>>>()?;
Ok(PipelineValue::Array(a.into()))
}
VrlValue::Null => Ok(PipelineValue::Null),
}
}
fn check_regex_output(output_kind: &Kind) -> Result<()> {
if output_kind.is_regex() {
return VrlRegexValueSnafu.fail();
@@ -146,10 +223,9 @@ fn check_regex_output(output_kind: &Kind) -> Result<()> {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::KeyString;
use super::*;
use crate::etl::value::Timestamp;
use crate::Map;
#[test]
fn test_vrl() {
@@ -167,27 +243,31 @@ del(.user_info)
let mut n = BTreeMap::new();
n.insert(
KeyString::from("name"),
VrlValue::Bytes(Bytes::from("certain_name")),
"name".to_string(),
PipelineValue::String("certain_name".to_string()),
);
let mut m = BTreeMap::new();
m.insert(KeyString::from("user_info"), VrlValue::Object(n));
m.insert(
"user_info".to_string(),
PipelineValue::Map(Map { values: n }),
);
let re = v.resolve(VrlValue::Object(m));
let re = v.resolve(PipelineValue::Map(Map { values: m }));
assert!(re.is_ok());
let re = re.unwrap();
assert!(matches!(re, VrlValue::Object(_)));
let re = re.as_object().unwrap();
assert!(matches!(re, PipelineValue::Map(_)));
assert!(re.get("name").is_some());
let name = re.get("name").unwrap();
let name = name.as_object().unwrap();
assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
assert!(matches!(name.get("a").unwrap(), PipelineValue::String(x) if x == "certain_name"));
assert!(matches!(name.get("b").unwrap(), PipelineValue::String(x) if x == "certain_name"));
assert!(re.get("timestamp").is_some());
let timestamp = re.get("timestamp").unwrap();
assert!(matches!(timestamp, VrlValue::Timestamp(_)));
assert!(matches!(
timestamp,
PipelineValue::Timestamp(Timestamp::Nanosecond(_))
));
}
#[test]

View File

@@ -15,20 +15,16 @@
pub mod index;
pub mod transformer;
use api::v1::value::ValueData;
use api::v1::ColumnDataType;
use chrono::Utc;
use snafu::{ensure, OptionExt};
use crate::error::{
Error, KeyMustBeStringSnafu, Result, TransformElementMustBeMapSnafu,
TransformFieldMustBeSetSnafu, TransformOnFailureInvalidValueSnafu, TransformTypeMustBeSetSnafu,
UnsupportedTypeInPipelineSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{yaml_bool, yaml_new_field, yaml_new_fields, yaml_string};
use crate::etl::transform::index::Index;
use crate::etl::value::{parse_str_type, parse_str_value};
use crate::etl::value::{Timestamp, Value};
const TRANSFORM_FIELD: &str = "field";
const TRANSFORM_FIELDS: &str = "fields";
@@ -128,61 +124,39 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
#[derive(Debug, Clone)]
pub struct Transform {
pub fields: Fields,
pub type_: ColumnDataType,
pub default: Option<ValueData>,
pub type_: Value,
pub default: Option<Value>,
pub index: Option<Index>,
pub tag: bool,
pub on_failure: Option<OnFailure>,
}
// valid types
// ColumnDataType::Int8
// ColumnDataType::Int16
// ColumnDataType::Int32
// ColumnDataType::Int64
// ColumnDataType::Uint8
// ColumnDataType::Uint16
// ColumnDataType::Uint32
// ColumnDataType::Uint64
// ColumnDataType::Float32
// ColumnDataType::Float64
// ColumnDataType::Boolean
// ColumnDataType::String
// ColumnDataType::TimestampNanosecond
// ColumnDataType::TimestampMicrosecond
// ColumnDataType::TimestampMillisecond
// ColumnDataType::TimestampSecond
// ColumnDataType::Binary
impl Default for Transform {
fn default() -> Self {
Transform {
fields: Fields::default(),
type_: Value::Null,
default: None,
index: None,
tag: false,
on_failure: None,
}
}
}
impl Transform {
pub(crate) fn get_default(&self) -> Option<&ValueData> {
pub(crate) fn get_default(&self) -> Option<&Value> {
self.default.as_ref()
}
pub(crate) fn get_type_matched_default_val(&self) -> Result<ValueData> {
get_default_for_type(&self.type_)
pub(crate) fn get_type_matched_default_val(&self) -> &Value {
&self.type_
}
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<ValueData> {
if is_timestamp_type(&self.type_) && self.index.is_some_and(|i| i == Index::Time) {
let now = Utc::now();
match self.type_ {
ColumnDataType::TimestampSecond => {
return Some(ValueData::TimestampSecondValue(now.timestamp()));
}
ColumnDataType::TimestampMillisecond => {
return Some(ValueData::TimestampMillisecondValue(now.timestamp_millis()));
}
ColumnDataType::TimestampMicrosecond => {
return Some(ValueData::TimestampMicrosecondValue(now.timestamp_micros()));
}
ColumnDataType::TimestampNanosecond => {
return Some(ValueData::TimestampNanosecondValue(
now.timestamp_nanos_opt()?,
));
}
_ => {}
}
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<Value> {
if matches!(self.type_, Value::Timestamp(_)) && self.index.is_some_and(|i| i == Index::Time)
{
return Some(Value::Timestamp(Timestamp::default()));
}
None
}
@@ -192,57 +166,17 @@ impl Transform {
}
}
fn is_timestamp_type(ty: &ColumnDataType) -> bool {
matches!(
ty,
ColumnDataType::TimestampSecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampNanosecond
)
}
fn get_default_for_type(ty: &ColumnDataType) -> Result<ValueData> {
let v = match ty {
ColumnDataType::Boolean => ValueData::BoolValue(false),
ColumnDataType::Int8 => ValueData::I8Value(0),
ColumnDataType::Int16 => ValueData::I16Value(0),
ColumnDataType::Int32 => ValueData::I32Value(0),
ColumnDataType::Int64 => ValueData::I64Value(0),
ColumnDataType::Uint8 => ValueData::U8Value(0),
ColumnDataType::Uint16 => ValueData::U16Value(0),
ColumnDataType::Uint32 => ValueData::U32Value(0),
ColumnDataType::Uint64 => ValueData::U64Value(0),
ColumnDataType::Float32 => ValueData::F32Value(0.0),
ColumnDataType::Float64 => ValueData::F64Value(0.0),
ColumnDataType::Binary => ValueData::BinaryValue(jsonb::Value::Null.to_vec()),
ColumnDataType::String => ValueData::StringValue(String::new()),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(0),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(0),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(0),
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(0),
_ => UnsupportedTypeInPipelineSnafu {
ty: ty.as_str_name(),
}
.fail()?,
};
Ok(v)
}
impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
type Error = Error;
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
let mut fields = Fields::default();
let mut type_ = Value::Null;
let mut default = None;
let mut index = None;
let mut tag = false;
let mut on_failure = None;
let mut type_ = None;
for (k, v) in hash {
let key = k
.as_str()
@@ -258,7 +192,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
TRANSFORM_TYPE => {
let t = yaml_string(v, TRANSFORM_TYPE)?;
type_ = Some(parse_str_type(&t)?);
type_ = Value::parse_str_type(&t)?;
}
TRANSFORM_INDEX => {
@@ -271,17 +205,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
}
TRANSFORM_DEFAULT => {
default = match v {
yaml_rust::Yaml::Real(r) => Some(r.clone()),
yaml_rust::Yaml::Integer(i) => Some(i.to_string()),
yaml_rust::Yaml::String(s) => Some(s.clone()),
yaml_rust::Yaml::Boolean(b) => Some(b.to_string()),
yaml_rust::Yaml::Array(_)
| yaml_rust::Yaml::Hash(_)
| yaml_rust::Yaml::Alias(_)
| yaml_rust::Yaml::Null
| yaml_rust::Yaml::BadValue => None,
};
default = Some(Value::try_from(v)?);
}
TRANSFORM_ON_FAILURE => {
@@ -295,14 +219,23 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
// ensure fields and type
ensure!(!fields.is_empty(), TransformFieldMustBeSetSnafu);
let type_ = type_.context(TransformTypeMustBeSetSnafu {
fields: format!("{:?}", fields),
})?;
ensure!(
type_ != Value::Null,
TransformTypeMustBeSetSnafu {
fields: format!("{:?}", fields)
}
);
let final_default = if let Some(default_value) = default {
let target = parse_str_value(&type_, &default_value)?;
on_failure = Some(OnFailure::Default);
Some(target)
match default_value {
// if default is not set, then it will be regarded as default null
Value::Null => None,
_ => {
let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
on_failure = Some(OnFailure::Default);
Some(target)
}
}
} else {
None
};

View File

@@ -14,7 +14,6 @@
pub mod coerce;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashSet};
use std::sync::Arc;
@@ -25,27 +24,26 @@ use api::v1::value::ValueData;
use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, SemanticType};
use coerce::{coerce_columns, coerce_value};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use common_telemetry::warn;
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
use itertools::Itertools;
use jsonb::Number;
use once_cell::sync::OnceCell;
use serde_json::Number;
use session::context::Channel;
use snafu::OptionExt;
use vrl::prelude::VrlValueConvert;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
IdentifyPipelineColumnTypeMismatchSnafu, InvalidTimestampSnafu, ReachedMaxNestedLevelsSnafu,
Result, TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu, ValueMustBeMapSnafu,
IdentifyPipelineColumnTypeMismatchSnafu, ReachedMaxNestedLevelsSnafu, Result,
TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu,
UnsupportedNumberTypeSnafu, ValueMustBeMapSnafu,
};
use crate::etl::ctx_req::ContextOpt;
use crate::etl::field::{Field, Fields};
use crate::etl::transform::index::Index;
use crate::etl::transform::{Transform, Transforms};
use crate::etl::value::{Timestamp, Value};
use crate::etl::PipelineDocVersion;
use crate::{unwrap_or_continue_if_err, PipelineContext};
use crate::{unwrap_or_continue_if_err, Map, PipelineContext};
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
@@ -135,7 +133,7 @@ impl GreptimePipelineParams {
impl GreptimeTransformer {
/// Add a default timestamp column to the transforms
fn add_greptime_timestamp_column(transforms: &mut Transforms) {
let type_ = ColumnDataType::TimestampNanosecond;
let type_ = Value::Timestamp(Timestamp::Nanosecond(0));
let default = None;
let transform = Transform {
@@ -222,7 +220,7 @@ impl GreptimeTransformer {
pub fn transform_mut(
&self,
pipeline_map: &mut VrlValue,
pipeline_map: &mut Value,
is_v1: bool,
) -> Result<Vec<GreptimeValue>> {
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
@@ -231,7 +229,6 @@ impl GreptimeTransformer {
for field in transform.fields.iter() {
let column_name = field.input_field();
let pipeline_map = pipeline_map.as_object_mut().context(ValueMustBeMapSnafu)?;
// let keep us `get` here to be compatible with v1
match pipeline_map.get(column_name) {
Some(v) => {
@@ -243,8 +240,11 @@ impl GreptimeTransformer {
let value_data = match transform.on_failure {
Some(crate::etl::transform::OnFailure::Default) => {
match transform.get_default() {
Some(default) => Some(default.clone()),
None => transform.get_default_value_when_data_is_none(),
Some(default) => coerce_value(default, transform)?,
None => match transform.get_default_value_when_data_is_none() {
Some(default) => coerce_value(&default, transform)?,
None => None,
},
}
}
Some(crate::etl::transform::OnFailure::Ignore) => None,
@@ -349,22 +349,63 @@ fn resolve_schema(
}
}
fn calc_ts(p_ctx: &PipelineContext, values: &VrlValue) -> Result<Option<ValueData>> {
fn resolve_number_schema(
n: Number,
column_name: String,
index: Option<usize>,
row: &mut Vec<GreptimeValue>,
schema_info: &mut SchemaInfo,
) -> Result<()> {
let (value, datatype, semantic_type) = if n.is_i64() {
(
ValueData::I64Value(n.as_i64().unwrap()),
ColumnDataType::Int64 as i32,
SemanticType::Field as i32,
)
} else if n.is_u64() {
(
ValueData::U64Value(n.as_u64().unwrap()),
ColumnDataType::Uint64 as i32,
SemanticType::Field as i32,
)
} else if n.is_f64() {
(
ValueData::F64Value(n.as_f64().unwrap()),
ColumnDataType::Float64 as i32,
SemanticType::Field as i32,
)
} else {
return UnsupportedNumberTypeSnafu { value: n }.fail();
};
resolve_schema(
index,
value,
ColumnSchema {
column_name,
datatype,
semantic_type,
datatype_extension: None,
options: None,
},
row,
schema_info,
)
}
fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>> {
match p_ctx.channel {
Channel::Prometheus => {
let ts = values
.as_object()
.and_then(|m| m.get(GREPTIME_TIMESTAMP))
.and_then(|ts| ts.try_into_i64().ok())
.unwrap_or_default();
Ok(Some(ValueData::TimestampMillisecondValue(ts)))
}
Channel::Prometheus => Ok(Some(ValueData::TimestampMillisecondValue(
values
.get(GREPTIME_TIMESTAMP)
.and_then(|v| v.as_i64())
.unwrap_or_default(),
))),
_ => {
let custom_ts = p_ctx.pipeline_definition.get_custom_ts();
match custom_ts {
Some(ts) => {
let ts_field = values.as_object().and_then(|m| m.get(ts.get_column_name()));
Some(ts.get_timestamp_value(ts_field)).transpose()
let ts_field = values.get(ts.get_column_name());
Some(ts.get_timestamp(ts_field)).transpose()
}
None => Ok(Some(ValueData::TimestampNanosecondValue(
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
@@ -376,20 +417,18 @@ fn calc_ts(p_ctx: &PipelineContext, values: &VrlValue) -> Result<Option<ValueDat
pub(crate) fn values_to_row(
schema_info: &mut SchemaInfo,
values: VrlValue,
values: Value,
pipeline_ctx: &PipelineContext<'_>,
row: Option<Vec<GreptimeValue>>,
need_calc_ts: bool,
) -> Result<Row> {
let mut row: Vec<GreptimeValue> =
row.unwrap_or_else(|| Vec::with_capacity(schema_info.schema.len()));
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
if need_calc_ts {
// calculate timestamp value based on the channel
let ts = calc_ts(pipeline_ctx, &values)?;
row.push(GreptimeValue { value_data: ts });
}
// calculate timestamp value based on the channel
let ts = calc_ts(pipeline_ctx, &values)?;
row.push(GreptimeValue { value_data: ts });
row.resize(schema_info.schema.len(), GreptimeValue { value_data: None });
@@ -398,20 +437,14 @@ pub(crate) fn values_to_row(
.as_ref()
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
let values = values.into_object().context(ValueMustBeMapSnafu)?;
let values = values.into_map().context(ValueMustBeMapSnafu)?;
for (column_name, value) in values {
if column_name.as_str() == ts_column_name {
if column_name == ts_column_name {
continue;
}
resolve_value(
value,
column_name.into(),
&mut row,
schema_info,
pipeline_ctx,
)?;
resolve_value(value, column_name, &mut row, schema_info, pipeline_ctx)?;
}
Ok(Row { values: row })
}
@@ -425,7 +458,7 @@ fn decide_semantic(p_ctx: &PipelineContext, column_name: &str) -> i32 {
}
fn resolve_value(
value: VrlValue,
value: Value,
column_name: String,
row: &mut Vec<GreptimeValue>,
schema_info: &mut SchemaInfo,
@@ -451,23 +484,27 @@ fn resolve_value(
};
match value {
VrlValue::Null => {}
Value::Null => {}
VrlValue::Integer(v) => {
Value::Int8(_) | Value::Int16(_) | Value::Int32(_) | Value::Int64(_) => {
// safe unwrap after type matched
let v = value.as_i64().unwrap();
resolve_simple_type(ValueData::I64Value(v), column_name, ColumnDataType::Int64)?;
}
VrlValue::Float(v) => {
Value::Uint8(_) | Value::Uint16(_) | Value::Uint32(_) | Value::Uint64(_) => {
// safe unwrap after type matched
resolve_simple_type(
ValueData::F64Value(v.into()),
column_name,
ColumnDataType::Float64,
)?;
let v = value.as_u64().unwrap();
resolve_simple_type(ValueData::U64Value(v), column_name, ColumnDataType::Uint64)?;
}
VrlValue::Boolean(v) => {
Value::Float32(_) | Value::Float64(_) => {
// safe unwrap after type matched
let v = value.as_f64().unwrap();
resolve_simple_type(ValueData::F64Value(v), column_name, ColumnDataType::Float64)?;
}
Value::Boolean(v) => {
resolve_simple_type(
ValueData::BoolValue(v),
column_name,
@@ -475,30 +512,15 @@ fn resolve_value(
)?;
}
VrlValue::Bytes(v) => {
Value::String(v) => {
resolve_simple_type(
ValueData::StringValue(String::from_utf8_lossy_owned(v.to_vec())),
ValueData::StringValue(v),
column_name,
ColumnDataType::String,
)?;
}
VrlValue::Regex(v) => {
warn!(
"Persisting regex value in the table, this should not happen, column_name: {}",
column_name
);
resolve_simple_type(
ValueData::StringValue(v.to_string()),
column_name,
ColumnDataType::String,
)?;
}
VrlValue::Timestamp(ts) => {
let ns = ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
input: ts.to_rfc3339(),
})?;
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
resolve_simple_type(
ValueData::TimestampNanosecondValue(ns),
column_name,
@@ -506,8 +528,32 @@ fn resolve_value(
)?;
}
VrlValue::Array(_) | VrlValue::Object(_) => {
let data = vrl_value_to_jsonb_value(&value);
Value::Timestamp(Timestamp::Microsecond(us)) => {
resolve_simple_type(
ValueData::TimestampMicrosecondValue(us),
column_name,
ColumnDataType::TimestampMicrosecond,
)?;
}
Value::Timestamp(Timestamp::Millisecond(ms)) => {
resolve_simple_type(
ValueData::TimestampMillisecondValue(ms),
column_name,
ColumnDataType::TimestampMillisecond,
)?;
}
Value::Timestamp(Timestamp::Second(s)) => {
resolve_simple_type(
ValueData::TimestampSecondValue(s),
column_name,
ColumnDataType::TimestampSecond,
)?;
}
Value::Array(_) | Value::Map(_) => {
let data: jsonb::Value = value.into();
resolve_schema(
index,
ValueData::BinaryValue(data.to_vec()),
@@ -528,32 +574,8 @@ fn resolve_value(
Ok(())
}
fn vrl_value_to_jsonb_value<'a>(value: &'a VrlValue) -> jsonb::Value<'a> {
match value {
VrlValue::Bytes(bytes) => jsonb::Value::String(String::from_utf8_lossy(bytes)),
VrlValue::Regex(value_regex) => jsonb::Value::String(Cow::Borrowed(value_regex.as_str())),
VrlValue::Integer(i) => jsonb::Value::Number(Number::Int64(*i)),
VrlValue::Float(not_nan) => jsonb::Value::Number(Number::Float64(not_nan.into_inner())),
VrlValue::Boolean(b) => jsonb::Value::Bool(*b),
VrlValue::Timestamp(date_time) => jsonb::Value::String(Cow::Owned(date_time.to_rfc3339())),
VrlValue::Object(btree_map) => jsonb::Value::Object(
btree_map
.iter()
.map(|(key, value)| (key.to_string(), vrl_value_to_jsonb_value(value)))
.collect(),
),
VrlValue::Array(values) => jsonb::Value::Array(
values
.iter()
.map(|value| vrl_value_to_jsonb_value(value))
.collect(),
),
VrlValue::Null => jsonb::Value::Null,
}
}
fn identity_pipeline_inner(
pipeline_maps: Vec<VrlValue>,
pipeline_maps: Vec<Value>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<(SchemaInfo, HashMap<ContextOpt, Vec<Row>>)> {
let skip_error = pipeline_ctx.pipeline_param.skip_error();
@@ -563,7 +585,7 @@ fn identity_pipeline_inner(
// set time index column schema first
schema_info.schema.push(ColumnSchema {
column_name: custom_ts
.map(|ts| ts.get_column_name().to_string())
.map(|ts| ts.get_column_name().clone())
.unwrap_or_else(|| DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string()),
datatype: custom_ts.map(|c| c.get_datatype()).unwrap_or_else(|| {
if pipeline_ctx.channel == Channel::Prometheus {
@@ -586,7 +608,7 @@ fn identity_pipeline_inner(
skip_error
);
let row = unwrap_or_continue_if_err!(
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None, true),
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None),
skip_error
);
@@ -618,7 +640,7 @@ fn identity_pipeline_inner(
/// 4. The pipeline will return an error if the same column datatype is mismatched
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
pub fn identity_pipeline(
array: Vec<VrlValue>,
array: Vec<Value>,
table: Option<Arc<table::Table>>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<HashMap<ContextOpt, Rows>> {
@@ -666,22 +688,22 @@ pub fn identity_pipeline(
///
/// The `max_nested_levels` parameter is used to limit the nested levels of the JSON object.
/// The error will be returned if the nested levels is greater than the `max_nested_levels`.
pub fn flatten_object(object: VrlValue, max_nested_levels: usize) -> Result<VrlValue> {
pub fn flatten_object(object: Value, max_nested_levels: usize) -> Result<Value> {
let mut flattened = BTreeMap::new();
let object = object.into_object().context(ValueMustBeMapSnafu)?;
let object = object.into_map().context(ValueMustBeMapSnafu)?;
if !object.is_empty() {
// it will use recursion to flatten the object.
do_flatten_object(&mut flattened, None, object, 1, max_nested_levels)?;
}
Ok(VrlValue::Object(flattened))
Ok(Value::Map(Map { values: flattened }))
}
fn do_flatten_object(
dest: &mut BTreeMap<KeyString, VrlValue>,
dest: &mut BTreeMap<String, Value>,
base: Option<&str>,
object: BTreeMap<KeyString, VrlValue>,
object: BTreeMap<String, Value>,
current_level: usize,
max_nested_levels: usize,
) -> Result<()> {
@@ -691,17 +713,14 @@ fn do_flatten_object(
}
for (key, value) in object {
let new_key = base.map_or_else(
|| key.clone(),
|base_key| format!("{base_key}.{key}").into(),
);
let new_key = base.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
match value {
VrlValue::Object(object) => {
Value::Map(object) => {
do_flatten_object(
dest,
Some(&new_key),
object,
object.values,
current_level + 1,
max_nested_levels,
)?;
@@ -721,6 +740,7 @@ mod tests {
use api::v1::SemanticType;
use super::*;
use crate::etl::{json_array_to_map, json_to_map};
use crate::{identity_pipeline, PipelineDefinition};
#[test]
@@ -732,7 +752,7 @@ mod tests {
Channel::Unknown,
);
{
let array = [
let array = vec![
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -752,7 +772,7 @@ mod tests {
"gaga": "gaga"
}),
];
let array = array.iter().map(|v| v.into()).collect();
let array = json_array_to_map(array).unwrap();
let rows = identity_pipeline(array, None, &pipeline_ctx);
assert!(rows.is_err());
assert_eq!(
@@ -761,7 +781,7 @@ mod tests {
);
}
{
let array = [
let array = vec![
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -781,8 +801,7 @@ mod tests {
"gaga": "gaga"
}),
];
let array = array.iter().map(|v| v.into()).collect();
let rows = identity_pipeline(array, None, &pipeline_ctx);
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
assert!(rows.is_err());
assert_eq!(
rows.err().unwrap().to_string(),
@@ -790,7 +809,7 @@ mod tests {
);
}
{
let array = [
let array = vec![
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -810,8 +829,7 @@ mod tests {
"gaga": "gaga"
}),
];
let array = array.iter().map(|v| v.into()).collect();
let rows = identity_pipeline(array, None, &pipeline_ctx);
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
assert!(rows.is_ok());
let mut rows = rows.unwrap();
assert!(rows.len() == 1);
@@ -822,7 +840,7 @@ mod tests {
assert_eq!(8, rows.rows[1].values.len());
}
{
let array = [
let array = vec![
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -844,23 +862,22 @@ mod tests {
];
let tag_column_names = ["name".to_string(), "address".to_string()];
let rows =
identity_pipeline_inner(array.iter().map(|v| v.into()).collect(), &pipeline_ctx)
.map(|(mut schema, mut rows)| {
for name in tag_column_names {
if let Some(index) = schema.index.get(&name) {
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
}
let rows = identity_pipeline_inner(json_array_to_map(array).unwrap(), &pipeline_ctx)
.map(|(mut schema, mut rows)| {
for name in tag_column_names {
if let Some(index) = schema.index.get(&name) {
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
}
}
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
Rows {
schema: schema.schema,
rows,
}
});
Rows {
schema: schema.schema,
rows,
}
});
assert!(rows.is_ok());
let rows = rows.unwrap();
@@ -957,8 +974,8 @@ mod tests {
];
for (input, max_depth, expected) in test_cases {
let input = input.into();
let expected = expected.map(|e| e.into());
let input = json_to_map(input).unwrap();
let expected = expected.map(|e| json_to_map(e).unwrap());
let flattened_object = flatten_object(input, max_depth).ok();
assert_eq!(flattened_object, expected);

View File

@@ -18,17 +18,58 @@ use api::v1::{ColumnDataTypeExtension, ColumnOptions, JsonTypeExtension};
use datatypes::schema::{FulltextOptions, SkippingIndexOptions};
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{ColumnDataType, ColumnSchema, SemanticType};
use snafu::{OptionExt, ResultExt};
use vrl::value::Value as VrlValue;
use snafu::ResultExt;
use crate::error::{
CoerceIncompatibleTypesSnafu, CoerceJsonTypeToSnafu, CoerceStringToTypeSnafu,
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, ColumnOptionsSnafu,
InvalidTimestampSnafu, Result, UnsupportedTypeInPipelineSnafu, VrlRegexValueSnafu,
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, CoerceUnsupportedNullTypeSnafu,
CoerceUnsupportedNullTypeToSnafu, ColumnOptionsSnafu, Error, Result,
};
use crate::etl::transform::index::Index;
use crate::etl::transform::transformer::greptime::vrl_value_to_jsonb_value;
use crate::etl::transform::{OnFailure, Transform};
use crate::etl::value::{Timestamp, Value};
impl TryFrom<Value> for ValueData {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
match value {
Value::Null => CoerceUnsupportedNullTypeSnafu.fail(),
Value::Int8(v) => Ok(ValueData::I32Value(v as i32)),
Value::Int16(v) => Ok(ValueData::I32Value(v as i32)),
Value::Int32(v) => Ok(ValueData::I32Value(v)),
Value::Int64(v) => Ok(ValueData::I64Value(v)),
Value::Uint8(v) => Ok(ValueData::U32Value(v as u32)),
Value::Uint16(v) => Ok(ValueData::U32Value(v as u32)),
Value::Uint32(v) => Ok(ValueData::U32Value(v)),
Value::Uint64(v) => Ok(ValueData::U64Value(v)),
Value::Float32(v) => Ok(ValueData::F32Value(v)),
Value::Float64(v) => Ok(ValueData::F64Value(v)),
Value::Boolean(v) => Ok(ValueData::BoolValue(v)),
Value::String(v) => Ok(ValueData::StringValue(v)),
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
Ok(ValueData::TimestampNanosecondValue(ns))
}
Value::Timestamp(Timestamp::Microsecond(us)) => {
Ok(ValueData::TimestampMicrosecondValue(us))
}
Value::Timestamp(Timestamp::Millisecond(ms)) => {
Ok(ValueData::TimestampMillisecondValue(ms))
}
Value::Timestamp(Timestamp::Second(s)) => Ok(ValueData::TimestampSecondValue(s)),
Value::Array(_) | Value::Map(_) => {
let data: jsonb::Value = value.into();
Ok(ValueData::BinaryValue(data.to_vec()))
}
}
}
}
pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>> {
let mut columns = Vec::new();
@@ -36,21 +77,15 @@ pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>>
for field in transform.fields.iter() {
let column_name = field.target_or_input_field().to_string();
let ext = if matches!(transform.type_, ColumnDataType::Binary) {
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
})
} else {
None
};
let (datatype, datatype_extension) = coerce_type(transform)?;
let semantic_type = coerce_semantic_type(transform) as i32;
let column = ColumnSchema {
column_name,
datatype: transform.type_ as i32,
datatype: datatype as i32,
semantic_type,
datatype_extension: ext,
datatype_extension,
options: coerce_options(transform)?,
};
columns.push(column);
@@ -88,60 +123,113 @@ fn coerce_options(transform: &Transform) -> Result<Option<ColumnOptions>> {
}
}
pub(crate) fn coerce_value(val: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
fn coerce_type(transform: &Transform) -> Result<(ColumnDataType, Option<ColumnDataTypeExtension>)> {
match transform.type_ {
Value::Int8(_) => Ok((ColumnDataType::Int8, None)),
Value::Int16(_) => Ok((ColumnDataType::Int16, None)),
Value::Int32(_) => Ok((ColumnDataType::Int32, None)),
Value::Int64(_) => Ok((ColumnDataType::Int64, None)),
Value::Uint8(_) => Ok((ColumnDataType::Uint8, None)),
Value::Uint16(_) => Ok((ColumnDataType::Uint16, None)),
Value::Uint32(_) => Ok((ColumnDataType::Uint32, None)),
Value::Uint64(_) => Ok((ColumnDataType::Uint64, None)),
Value::Float32(_) => Ok((ColumnDataType::Float32, None)),
Value::Float64(_) => Ok((ColumnDataType::Float64, None)),
Value::Boolean(_) => Ok((ColumnDataType::Boolean, None)),
Value::String(_) => Ok((ColumnDataType::String, None)),
Value::Timestamp(Timestamp::Nanosecond(_)) => {
Ok((ColumnDataType::TimestampNanosecond, None))
}
Value::Timestamp(Timestamp::Microsecond(_)) => {
Ok((ColumnDataType::TimestampMicrosecond, None))
}
Value::Timestamp(Timestamp::Millisecond(_)) => {
Ok((ColumnDataType::TimestampMillisecond, None))
}
Value::Timestamp(Timestamp::Second(_)) => Ok((ColumnDataType::TimestampSecond, None)),
Value::Array(_) | Value::Map(_) => Ok((
ColumnDataType::Binary,
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
}),
)),
Value::Null => CoerceUnsupportedNullTypeToSnafu {
ty: transform.type_.to_str_type(),
}
.fail(),
}
}
pub(crate) fn coerce_value(val: &Value, transform: &Transform) -> Result<Option<ValueData>> {
match val {
VrlValue::Null => Ok(None),
VrlValue::Integer(n) => coerce_i64_value(*n, transform),
VrlValue::Float(n) => coerce_f64_value(n.into_inner(), transform),
VrlValue::Boolean(b) => coerce_bool_value(*b, transform),
VrlValue::Bytes(b) => coerce_string_value(String::from_utf8_lossy(b).as_ref(), transform),
VrlValue::Timestamp(ts) => match transform.type_ {
ColumnDataType::TimestampNanosecond => Ok(Some(ValueData::TimestampNanosecondValue(
ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
input: ts.to_rfc3339(),
})?,
))),
ColumnDataType::TimestampMicrosecond => Ok(Some(ValueData::TimestampMicrosecondValue(
ts.timestamp_micros(),
))),
ColumnDataType::TimestampMillisecond => Ok(Some(ValueData::TimestampMillisecondValue(
ts.timestamp_millis(),
))),
ColumnDataType::TimestampSecond => {
Ok(Some(ValueData::TimestampSecondValue(ts.timestamp())))
}
Value::Null => Ok(None),
Value::Int8(n) => coerce_i64_value(*n as i64, transform),
Value::Int16(n) => coerce_i64_value(*n as i64, transform),
Value::Int32(n) => coerce_i64_value(*n as i64, transform),
Value::Int64(n) => coerce_i64_value(*n, transform),
Value::Uint8(n) => coerce_u64_value(*n as u64, transform),
Value::Uint16(n) => coerce_u64_value(*n as u64, transform),
Value::Uint32(n) => coerce_u64_value(*n as u64, transform),
Value::Uint64(n) => coerce_u64_value(*n, transform),
Value::Float32(n) => coerce_f64_value(*n as f64, transform),
Value::Float64(n) => coerce_f64_value(*n, transform),
Value::Boolean(b) => coerce_bool_value(*b, transform),
Value::String(s) => coerce_string_value(s, transform),
Value::Timestamp(input_timestamp) => match &transform.type_ {
Value::Timestamp(target_timestamp) => match target_timestamp {
Timestamp::Nanosecond(_) => Ok(Some(ValueData::TimestampNanosecondValue(
input_timestamp.timestamp_nanos(),
))),
Timestamp::Microsecond(_) => Ok(Some(ValueData::TimestampMicrosecondValue(
input_timestamp.timestamp_micros(),
))),
Timestamp::Millisecond(_) => Ok(Some(ValueData::TimestampMillisecondValue(
input_timestamp.timestamp_millis(),
))),
Timestamp::Second(_) => Ok(Some(ValueData::TimestampSecondValue(
input_timestamp.timestamp(),
))),
},
_ => CoerceIncompatibleTypesSnafu {
msg: "Timestamp can only be coerced to another type",
}
.fail(),
},
VrlValue::Array(_) | VrlValue::Object(_) => coerce_json_value(val, transform),
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
Value::Array(_) | Value::Map(_) => coerce_json_value(val, transform),
}
}
fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>> {
let val = match transform.type_ {
ColumnDataType::Int8 => ValueData::I8Value(b as i32),
ColumnDataType::Int16 => ValueData::I16Value(b as i32),
ColumnDataType::Int32 => ValueData::I32Value(b as i32),
ColumnDataType::Int64 => ValueData::I64Value(b as i64),
Value::Int8(_) => ValueData::I8Value(b as i32),
Value::Int16(_) => ValueData::I16Value(b as i32),
Value::Int32(_) => ValueData::I32Value(b as i32),
Value::Int64(_) => ValueData::I64Value(b as i64),
ColumnDataType::Uint8 => ValueData::U8Value(b as u32),
ColumnDataType::Uint16 => ValueData::U16Value(b as u32),
ColumnDataType::Uint32 => ValueData::U32Value(b as u32),
ColumnDataType::Uint64 => ValueData::U64Value(b as u64),
Value::Uint8(_) => ValueData::U8Value(b as u32),
Value::Uint16(_) => ValueData::U16Value(b as u32),
Value::Uint32(_) => ValueData::U32Value(b as u32),
Value::Uint64(_) => ValueData::U64Value(b as u64),
ColumnDataType::Float32 => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
ColumnDataType::Float64 => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
Value::Float32(_) => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
Value::Float64(_) => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
ColumnDataType::Boolean => ValueData::BoolValue(b),
ColumnDataType::String => ValueData::StringValue(b.to_string()),
Value::Boolean(_) => ValueData::BoolValue(b),
Value::String(_) => ValueData::StringValue(b.to_string()),
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Value::Timestamp(_) => match transform.on_failure {
Some(OnFailure::Ignore) => return Ok(None),
Some(OnFailure::Default) => {
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
@@ -151,19 +239,14 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
}
},
ColumnDataType::Binary => {
Value::Array(_) | Value::Map(_) => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.as_str_name(),
ty: transform.type_.to_str_type(),
}
.fail()
}
_ => {
return UnsupportedTypeInPipelineSnafu {
ty: transform.type_.as_str_name(),
}
.fail()
}
Value::Null => return Ok(None),
};
Ok(Some(val))
@@ -171,35 +254,37 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match &transform.type_ {
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n),
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n as u64),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n as f64),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Boolean(_) => ValueData::BoolValue(n != 0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n),
Value::Timestamp(unit) => match unit {
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n),
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n),
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n),
Timestamp::Second(_) => ValueData::TimestampSecondValue(n),
},
ColumnDataType::Binary => {
Value::Array(_) | Value::Map(_) => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.as_str_name(),
ty: transform.type_.to_str_type(),
}
.fail()
}
_ => return Ok(None),
Value::Null => return Ok(None),
};
Ok(Some(val))
@@ -207,35 +292,37 @@ fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>>
fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match &transform.type_ {
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n as i64),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n as f64),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Boolean(_) => ValueData::BoolValue(n != 0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n as i64),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n as i64),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n as i64),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n as i64),
Value::Timestamp(unit) => match unit {
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n as i64),
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n as i64),
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n as i64),
Timestamp::Second(_) => ValueData::TimestampSecondValue(n as i64),
},
ColumnDataType::Binary => {
Value::Array(_) | Value::Map(_) => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.as_str_name(),
ty: transform.type_.to_str_type(),
}
.fail()
}
_ => return Ok(None),
Value::Null => return Ok(None),
};
Ok(Some(val))
@@ -243,26 +330,23 @@ fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>>
fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match transform.type_ {
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n as i64),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n as u64),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0.0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Boolean(_) => ValueData::BoolValue(n != 0.0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Value::Timestamp(_) => match transform.on_failure {
Some(OnFailure::Ignore) => return Ok(None),
Some(OnFailure::Default) => {
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
@@ -272,14 +356,14 @@ fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>>
}
},
ColumnDataType::Binary => {
Value::Array(_) | Value::Map(_) => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.as_str_name(),
ty: transform.type_.to_str_type(),
}
.fail()
}
_ => return Ok(None),
Value::Null => return Ok(None),
};
Ok(Some(val))
@@ -292,12 +376,12 @@ macro_rules! coerce_string_value {
Err(_) => match $transform.on_failure {
Some(OnFailure::Ignore) => Ok(None),
Some(OnFailure::Default) => match $transform.get_default() {
Some(default) => Ok(Some(default.clone())),
None => $transform.get_type_matched_default_val().map(Some),
Some(default) => coerce_value(default, $transform),
None => coerce_value($transform.get_type_matched_default_val(), $transform),
},
None => CoerceStringToTypeSnafu {
s: $s,
ty: $transform.type_.as_str_name(),
ty: $transform.type_.to_str_type(),
}
.fail(),
},
@@ -305,85 +389,92 @@ macro_rules! coerce_string_value {
};
}
fn coerce_string_value(s: &str, transform: &Transform) -> Result<Option<ValueData>> {
fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<ValueData>> {
match transform.type_ {
ColumnDataType::Int8 => {
Value::Int8(_) => {
coerce_string_value!(s, transform, i32, I8Value)
}
ColumnDataType::Int16 => {
Value::Int16(_) => {
coerce_string_value!(s, transform, i32, I16Value)
}
ColumnDataType::Int32 => {
Value::Int32(_) => {
coerce_string_value!(s, transform, i32, I32Value)
}
ColumnDataType::Int64 => {
Value::Int64(_) => {
coerce_string_value!(s, transform, i64, I64Value)
}
ColumnDataType::Uint8 => {
Value::Uint8(_) => {
coerce_string_value!(s, transform, u32, U8Value)
}
ColumnDataType::Uint16 => {
Value::Uint16(_) => {
coerce_string_value!(s, transform, u32, U16Value)
}
ColumnDataType::Uint32 => {
Value::Uint32(_) => {
coerce_string_value!(s, transform, u32, U32Value)
}
ColumnDataType::Uint64 => {
Value::Uint64(_) => {
coerce_string_value!(s, transform, u64, U64Value)
}
ColumnDataType::Float32 => {
Value::Float32(_) => {
coerce_string_value!(s, transform, f32, F32Value)
}
ColumnDataType::Float64 => {
Value::Float64(_) => {
coerce_string_value!(s, transform, f64, F64Value)
}
ColumnDataType::Boolean => {
Value::Boolean(_) => {
coerce_string_value!(s, transform, bool, BoolValue)
}
ColumnDataType::String => Ok(Some(ValueData::StringValue(s.to_string()))),
Value::String(_) => Ok(Some(ValueData::StringValue(s.to_string()))),
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Value::Timestamp(_) => match transform.on_failure {
Some(OnFailure::Ignore) => Ok(None),
Some(OnFailure::Default) => CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail(),
None => CoerceUnsupportedEpochTypeSnafu { ty: "String" }.fail(),
},
ColumnDataType::Binary => CoerceStringToTypeSnafu {
Value::Array(_) | Value::Map(_) => CoerceStringToTypeSnafu {
s,
ty: transform.type_.as_str_name(),
ty: transform.type_.to_str_type(),
}
.fail(),
_ => Ok(None),
Value::Null => Ok(None),
}
}
fn coerce_json_value(v: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
fn coerce_json_value(v: &Value, transform: &Transform) -> Result<Option<ValueData>> {
match &transform.type_ {
ColumnDataType::Binary => (),
Value::Array(_) | Value::Map(_) => (),
t => {
return CoerceTypeToJsonSnafu {
ty: t.as_str_name(),
ty: t.to_str_type(),
}
.fail();
}
}
let data: jsonb::Value = vrl_value_to_jsonb_value(v);
Ok(Some(ValueData::BinaryValue(data.to_vec())))
match v {
Value::Map(_) => {
let data: jsonb::Value = v.into();
Ok(Some(ValueData::BinaryValue(data.to_vec())))
}
Value::Array(_) => {
let data: jsonb::Value = v.into();
Ok(Some(ValueData::BinaryValue(data.to_vec())))
}
_ => CoerceTypeToJsonSnafu {
ty: v.to_str_type(),
}
.fail(),
}
}
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use super::*;
use crate::etl::field::Fields;
@@ -391,7 +482,7 @@ mod tests {
fn test_coerce_string_without_on_failure() {
let transform = Transform {
fields: Fields::default(),
type_: ColumnDataType::Int32,
type_: Value::Int32(0),
default: None,
index: None,
on_failure: None,
@@ -400,14 +491,14 @@ mod tests {
// valid string
{
let val = VrlValue::Integer(123);
let val = Value::String("123".to_string());
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(123)));
}
// invalid string
{
let val = VrlValue::Bytes(Bytes::from("hello"));
let val = Value::String("hello".to_string());
let result = coerce_value(&val, &transform);
assert!(result.is_err());
}
@@ -417,14 +508,14 @@ mod tests {
fn test_coerce_string_with_on_failure_ignore() {
let transform = Transform {
fields: Fields::default(),
type_: ColumnDataType::Int32,
type_: Value::Int32(0),
default: None,
index: None,
on_failure: Some(OnFailure::Ignore),
tag: false,
};
let val = VrlValue::Bytes(Bytes::from("hello"));
let val = Value::String("hello".to_string());
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, None);
}
@@ -433,7 +524,7 @@ mod tests {
fn test_coerce_string_with_on_failure_default() {
let mut transform = Transform {
fields: Fields::default(),
type_: ColumnDataType::Int32,
type_: Value::Int32(0),
default: None,
index: None,
on_failure: Some(OnFailure::Default),
@@ -442,15 +533,15 @@ mod tests {
// with no explicit default value
{
let val = VrlValue::Bytes(Bytes::from("hello"));
let val = Value::String("hello".to_string());
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(0)));
}
// with explicit default value
{
transform.default = Some(ValueData::I32Value(42));
let val = VrlValue::Bytes(Bytes::from("hello"));
transform.default = Some(Value::Int32(42));
let val = Value::String("hello".to_string());
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(42)));
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,81 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::etl::value::Value;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Array {
pub values: Vec<Value>,
}
impl Array {
pub fn new() -> Self {
Array { values: vec![] }
}
}
impl std::fmt::Display for Array {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let values = self
.values
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(", ");
write!(f, "[{}]", values)
}
}
impl std::ops::Deref for Array {
type Target = Vec<Value>;
fn deref(&self) -> &Self::Target {
&self.values
}
}
impl std::ops::DerefMut for Array {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.values
}
}
impl IntoIterator for Array {
type Item = Value;
type IntoIter = std::vec::IntoIter<Value>;
fn into_iter(self) -> Self::IntoIter {
self.values.into_iter()
}
}
impl From<Vec<Value>> for Array {
fn from(values: Vec<Value>) -> Self {
Array { values }
}
}
impl TryFrom<Vec<serde_json::Value>> for Array {
type Error = Error;
fn try_from(value: Vec<serde_json::Value>) -> Result<Self> {
let values = value
.into_iter()
.map(|v| v.try_into())
.collect::<Result<Vec<_>>>()?;
Ok(Array { values })
}
}

View File

@@ -0,0 +1,70 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use crate::etl::value::Value;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Map {
pub values: BTreeMap<String, Value>,
}
impl Map {
pub fn one(key: impl Into<String>, value: Value) -> Map {
let mut map = Map::default();
map.insert(key, value);
map
}
pub fn insert(&mut self, key: impl Into<String>, value: Value) {
self.values.insert(key.into(), value);
}
pub fn extend(&mut self, Map { values }: Map) {
self.values.extend(values);
}
}
impl From<BTreeMap<String, Value>> for Map {
fn from(values: BTreeMap<String, Value>) -> Self {
Self { values }
}
}
impl std::ops::Deref for Map {
type Target = BTreeMap<String, Value>;
fn deref(&self) -> &Self::Target {
&self.values
}
}
impl std::ops::DerefMut for Map {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.values
}
}
impl std::fmt::Display for Map {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let values = self
.values
.iter()
.map(|(k, v)| format!("{}: {}", k, v))
.collect::<Vec<String>>()
.join(", ");
write!(f, "{{{}}}", values)
}
}

View File

@@ -0,0 +1,140 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
#[derive(Debug, Clone, PartialEq)]
pub enum Timestamp {
Nanosecond(i64),
Microsecond(i64),
Millisecond(i64),
Second(i64),
}
pub(crate) const NANOSECOND_RESOLUTION: &str = "nanosecond";
pub(crate) const NANO_RESOLUTION: &str = "nano";
pub(crate) const NS_RESOLUTION: &str = "ns";
pub(crate) const MICROSECOND_RESOLUTION: &str = "microsecond";
pub(crate) const MICRO_RESOLUTION: &str = "micro";
pub(crate) const US_RESOLUTION: &str = "us";
pub(crate) const MILLISECOND_RESOLUTION: &str = "millisecond";
pub(crate) const MILLI_RESOLUTION: &str = "milli";
pub(crate) const MS_RESOLUTION: &str = "ms";
pub(crate) const SECOND_RESOLUTION: &str = "second";
pub(crate) const SEC_RESOLUTION: &str = "sec";
pub(crate) const S_RESOLUTION: &str = "s";
pub(crate) const VALID_RESOLUTIONS: [&str; 12] = [
NANOSECOND_RESOLUTION,
NANO_RESOLUTION,
NS_RESOLUTION,
MICROSECOND_RESOLUTION,
MICRO_RESOLUTION,
US_RESOLUTION,
MILLISECOND_RESOLUTION,
MILLI_RESOLUTION,
MS_RESOLUTION,
SECOND_RESOLUTION,
SEC_RESOLUTION,
S_RESOLUTION,
];
impl Timestamp {
pub(crate) fn timestamp_nanos(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v,
Timestamp::Microsecond(v) => *v * 1_000,
Timestamp::Millisecond(v) => *v * 1_000_000,
Timestamp::Second(v) => *v * 1_000_000_000,
}
}
pub(crate) fn timestamp_micros(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000,
Timestamp::Microsecond(v) => *v,
Timestamp::Millisecond(v) => *v * 1_000,
Timestamp::Second(v) => *v * 1_000_000,
}
}
pub(crate) fn timestamp_millis(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000_000,
Timestamp::Microsecond(v) => *v / 1_000,
Timestamp::Millisecond(v) => *v,
Timestamp::Second(v) => *v * 1_000,
}
}
pub(crate) fn timestamp(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000_000_000,
Timestamp::Microsecond(v) => *v / 1_000_000,
Timestamp::Millisecond(v) => *v / 1_000,
Timestamp::Second(v) => *v,
}
}
pub(crate) fn to_unit(&self, unit: &TimeUnit) -> i64 {
match unit {
TimeUnit::Second => self.timestamp(),
TimeUnit::Millisecond => self.timestamp_millis(),
TimeUnit::Microsecond => self.timestamp_micros(),
TimeUnit::Nanosecond => self.timestamp_nanos(),
}
}
pub fn get_unit(&self) -> TimeUnit {
match self {
Timestamp::Nanosecond(_) => TimeUnit::Nanosecond,
Timestamp::Microsecond(_) => TimeUnit::Microsecond,
Timestamp::Millisecond(_) => TimeUnit::Millisecond,
Timestamp::Second(_) => TimeUnit::Second,
}
}
pub fn to_datetime(&self) -> Option<DateTime<Utc>> {
match self {
Timestamp::Nanosecond(v) => Some(DateTime::from_timestamp_nanos(*v)),
Timestamp::Microsecond(v) => DateTime::from_timestamp_micros(*v),
Timestamp::Millisecond(v) => DateTime::from_timestamp_millis(*v),
Timestamp::Second(v) => DateTime::from_timestamp(*v, 0),
}
}
pub fn from_datetime(dt: DateTime<Utc>) -> Option<Self> {
dt.timestamp_nanos_opt().map(Timestamp::Nanosecond)
}
}
impl Default for Timestamp {
fn default() -> Self {
Timestamp::Nanosecond(chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default())
}
}
impl std::fmt::Display for Timestamp {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (value, resolution) = match self {
Timestamp::Nanosecond(v) => (v, NANOSECOND_RESOLUTION),
Timestamp::Microsecond(v) => (v, MICROSECOND_RESOLUTION),
Timestamp::Millisecond(v) => (v, MILLISECOND_RESOLUTION),
Timestamp::Second(v) => (v, SECOND_RESOLUTION),
};
write!(f, "{}, resolution: {}", value, resolution)
}
}

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(string_from_utf8_lossy_owned)]
mod dispatcher;
pub mod error;
mod etl;
@@ -26,8 +24,10 @@ pub use etl::processor::Processor;
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
pub use etl::transform::transformer::identity_pipeline;
pub use etl::transform::GreptimeTransformer;
pub use etl::value::{Array, Map, Timestamp, Value};
pub use etl::{
parse, Content, DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map, Content,
DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
};
pub use manager::{
pipeline_operator, table, util, IdentityTimeIndex, PipelineContext, PipelineDefinition,

View File

@@ -16,22 +16,18 @@ use std::sync::Arc;
use api::v1::value::ValueData;
use api::v1::ColumnDataType;
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::timestamp::TimestampNanosecond;
use itertools::Itertools;
use session::context::Channel;
use snafu::{ensure, OptionExt};
use snafu::ensure;
use util::to_pipeline_version;
use vrl::value::Value as VrlValue;
use crate::error::{
CastTypeSnafu, InvalidCustomTimeIndexSnafu, InvalidTimestampSnafu, PipelineMissingSnafu, Result,
};
use crate::etl::value::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
use crate::error::{CastTypeSnafu, InvalidCustomTimeIndexSnafu, PipelineMissingSnafu, Result};
use crate::etl::value::time::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
use crate::table::PipelineTable;
use crate::{GreptimePipelineParams, Pipeline};
use crate::{GreptimePipelineParams, Pipeline, Value};
mod pipeline_cache;
pub mod pipeline_operator;
@@ -236,7 +232,7 @@ impl IdentityTimeIndex {
}
}
pub fn get_column_name(&self) -> &str {
pub fn get_column_name(&self) -> &String {
match self {
IdentityTimeIndex::Epoch(field, _, _) => field,
IdentityTimeIndex::DateStr(field, _, _) => field,
@@ -262,25 +258,25 @@ impl IdentityTimeIndex {
}
}
pub fn get_timestamp_value(&self, value: Option<&VrlValue>) -> Result<ValueData> {
pub fn get_timestamp(&self, value: Option<&Value>) -> Result<ValueData> {
match self {
IdentityTimeIndex::Epoch(_, unit, ignore_errors) => {
let v = match value {
Some(VrlValue::Integer(v)) => *v,
Some(VrlValue::Bytes(s)) => match String::from_utf8_lossy(s).parse::<i64>() {
Some(Value::Int32(v)) => *v as i64,
Some(Value::Int64(v)) => *v,
Some(Value::Uint32(v)) => *v as i64,
Some(Value::Uint64(v)) => *v as i64,
Some(Value::String(s)) => match s.parse::<i64>() {
Ok(v) => v,
Err(_) => {
return if_ignore_errors(
*ignore_errors,
*unit,
format!(
"failed to convert {} to number",
String::from_utf8_lossy(s)
),
format!("failed to convert {} to number", s),
)
}
},
Some(VrlValue::Timestamp(timestamp)) => datetime_utc_to_unit(timestamp, unit)?,
Some(Value::Timestamp(timestamp)) => timestamp.to_unit(unit),
Some(v) => {
return if_ignore_errors(
*ignore_errors,
@@ -296,7 +292,7 @@ impl IdentityTimeIndex {
}
IdentityTimeIndex::DateStr(_, format, ignore_errors) => {
let v = match value {
Some(VrlValue::Bytes(s)) => String::from_utf8_lossy(s),
Some(Value::String(s)) => s,
Some(v) => {
return if_ignore_errors(
*ignore_errors,
@@ -313,7 +309,7 @@ impl IdentityTimeIndex {
}
};
let timestamp = match chrono::DateTime::parse_from_str(&v, format) {
let timestamp = match chrono::DateTime::parse_from_str(v, format) {
Ok(ts) => ts,
Err(_) => {
return if_ignore_errors(
@@ -325,31 +321,13 @@ impl IdentityTimeIndex {
};
Ok(ValueData::TimestampNanosecondValue(
timestamp
.timestamp_nanos_opt()
.context(InvalidTimestampSnafu {
input: timestamp.to_rfc3339(),
})?,
timestamp.timestamp_nanos_opt().unwrap_or_default(),
))
}
}
}
}
fn datetime_utc_to_unit(timestamp: &DateTime<Utc>, unit: &TimeUnit) -> Result<i64> {
let ts = match unit {
TimeUnit::Nanosecond => timestamp
.timestamp_nanos_opt()
.context(InvalidTimestampSnafu {
input: timestamp.to_rfc3339(),
})?,
TimeUnit::Microsecond => timestamp.timestamp_micros(),
TimeUnit::Millisecond => timestamp.timestamp_millis(),
TimeUnit::Second => timestamp.timestamp(),
};
Ok(ts)
}
fn if_ignore_errors(ignore_errors: bool, unit: TimeUnit, msg: String) -> Result<ValueData> {
if ignore_errors {
Ok(time_unit_to_value_data(

View File

@@ -15,12 +15,12 @@
use dyn_fmt::AsStrFormatExt;
use regex::Regex;
use snafu::{ensure, OptionExt};
use vrl::value::Value as VrlValue;
use yaml_rust::Yaml;
use crate::error::{
Error, InvalidTableSuffixTemplateSnafu, RequiredTableSuffixTemplateSnafu, Result,
};
use crate::Value;
const REPLACE_KEY: &str = "{}";
@@ -47,16 +47,22 @@ pub(crate) struct TableSuffixTemplate {
}
impl TableSuffixTemplate {
pub fn apply(&self, val: &VrlValue) -> Option<String> {
let val = val.as_object()?;
pub fn apply(&self, val: &Value) -> Option<String> {
let values = self
.keys
.iter()
.filter_map(|key| {
let v = val.get(key.as_str())?;
let v = val.get(key)?;
match v {
VrlValue::Integer(v) => Some(v.to_string()),
VrlValue::Bytes(v) => Some(String::from_utf8_lossy_owned(v.to_vec())),
Value::Int8(v) => Some(v.to_string()),
Value::Int16(v) => Some(v.to_string()),
Value::Int32(v) => Some(v.to_string()),
Value::Int64(v) => Some(v.to_string()),
Value::Uint8(v) => Some(v.to_string()),
Value::Uint16(v) => Some(v.to_string()),
Value::Uint32(v) => Some(v.to_string()),
Value::Uint64(v) => Some(v.to_string()),
Value::String(v) => Some(v.clone()),
_ => None,
}
})

View File

@@ -13,12 +13,11 @@
// limitations under the License.
use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
use vrl::value::Value as VrlValue;
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
/// test util function to parse and execute pipeline
pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
let input_value = serde_json::from_str::<VrlValue>(input_str).unwrap();
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline");
@@ -33,19 +32,21 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
let mut rows = Vec::new();
match input_value {
VrlValue::Array(array) => {
serde_json::Value::Array(array) => {
for value in array {
let intermediate_status = json_to_map(value).unwrap();
let row = pipeline
.exec_mut(value, &pipeline_ctx, &mut schema_info)
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");
rows.push(row.0);
}
}
VrlValue::Object(_) => {
serde_json::Value::Object(_) => {
let intermediate_status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(input_value, &pipeline_ctx, &mut schema_info)
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");

View File

@@ -16,7 +16,7 @@ mod common;
use greptime_proto::v1::value::ValueData::StringValue;
use greptime_proto::v1::{ColumnDataType, SemanticType};
use pipeline::{setup_pipeline, PipelineContext};
use pipeline::{json_to_map, setup_pipeline, PipelineContext};
fn make_string_column_schema(name: String) -> greptime_proto::v1::ColumnSchema {
common::make_column_schema(name, ColumnDataType::String, SemanticType::Field)
@@ -282,7 +282,7 @@ transform:
session::context::Channel::Unknown,
);
let result = input_value.into();
let result = json_to_map(input_value).unwrap();
let row = pipeline.exec_mut(result, &pipeline_ctx, &mut schema_info);

View File

@@ -20,7 +20,7 @@ use greptime_proto::v1::value::ValueData::{
U32Value, U64Value, U8Value,
};
use greptime_proto::v1::Value as GreptimeValue;
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
#[test]
fn test_complex_data() {
@@ -425,7 +425,7 @@ transform:
&pipeline_param,
session::context::Channel::Unknown,
);
let stats = input_value.into();
let stats = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(stats, &pipeline_ctx, &mut schema_info)
@@ -500,7 +500,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -615,7 +615,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -687,7 +687,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -733,7 +733,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -798,7 +798,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -845,7 +845,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -913,7 +913,7 @@ transform:
session::context::Channel::Unknown,
);
let status = input_value1.into();
let status = json_to_map(input_value1).unwrap();
let dispatched_to = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -922,7 +922,7 @@ transform:
assert_eq!(dispatched_to.table_suffix, "http");
assert_eq!(dispatched_to.pipeline.unwrap(), "access_log_pipeline");
let status = input_value2.into();
let status = json_to_map(input_value2).unwrap();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -983,7 +983,7 @@ table_suffix: _${logger}
session::context::Channel::Unknown,
);
let status = input_value.into();
let status = json_to_map(input_value).unwrap();
let exec_re = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap();

View File

@@ -340,14 +340,7 @@ impl ExecutionPlan for RangeManipulateExec {
}
fn required_input_distribution(&self) -> Vec<Distribution> {
let input_requirement = self.input.required_input_distribution();
if input_requirement.is_empty() {
// if the input is EmptyMetric, its required_input_distribution() is empty so we can't
// use its input distribution.
vec![Distribution::UnspecifiedDistribution]
} else {
input_requirement
}
self.input.required_input_distribution()
}
fn with_new_children(

View File

@@ -71,7 +71,6 @@ store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
tracing.workspace = true
unescaper = "0.1"
uuid.workspace = true

View File

@@ -155,23 +155,7 @@ struct PlanRewriter {
/// Partition columns of the table in current pass
partition_cols: Option<Vec<String>>,
column_requirements: HashSet<Column>,
/// Whether to expand on next call
/// This is used to handle the case where a plan is transformed, but need to be expanded from it's
/// parent node. For example a Aggregate plan is split into two parts in frontend and datanode, and need
/// to be expanded from the parent node of the Aggregate plan.
expand_on_next_call: bool,
/// Expanding on next partial/conditional/transformed commutative plan
/// This is used to handle the case where a plan is transformed, but still
/// need to push down as many node as possible before next partial/conditional/transformed commutative
/// plan. I.e.
/// ```
/// Limit:
/// Sort:
/// ```
/// where `Limit` is partial commutative, and `Sort` is conditional commutative.
/// In this case, we need to expand the `Limit` plan,
/// so that we can push down the `Sort` plan as much as possible.
expand_on_next_part_cond_trans_commutative: bool,
new_child_plan: Option<LogicalPlan>,
}
@@ -193,38 +177,15 @@ impl PlanRewriter {
{
return true;
}
if self.expand_on_next_call {
self.expand_on_next_call = false;
return true;
}
if self.expand_on_next_part_cond_trans_commutative {
let comm = Categorizer::check_plan(plan, self.partition_cols.clone());
match comm {
Commutativity::PartialCommutative => {
// a small difference is that for partial commutative, we still need to
// expand on next call(so `Limit` can be pushed down)
self.expand_on_next_part_cond_trans_commutative = false;
self.expand_on_next_call = true;
}
Commutativity::ConditionalCommutative(_)
| Commutativity::TransformedCommutative { .. } => {
// for conditional commutative and transformed commutative, we can
// expand now
self.expand_on_next_part_cond_trans_commutative = false;
return true;
}
_ => (),
}
}
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
Commutativity::Commutative => {}
Commutativity::PartialCommutative => {
if let Some(plan) = partial_commutative_transformer(plan) {
self.update_column_requirements(&plan);
self.expand_on_next_part_cond_trans_commutative = true;
self.stage.push(plan)
}
}
@@ -233,7 +194,6 @@ impl PlanRewriter {
&& let Some(plan) = transformer(plan)
{
self.update_column_requirements(&plan);
self.expand_on_next_part_cond_trans_commutative = true;
self.stage.push(plan)
}
}
@@ -242,7 +202,7 @@ impl PlanRewriter {
&& let Some(transformer_actions) = transformer(plan)
{
debug!(
"PlanRewriter: transformed plan: {:?}\n from {plan}",
"PlanRewriter: transformed plan: {:#?}\n from {plan}",
transformer_actions.extra_parent_plans
);
if let Some(last_stage) = transformer_actions.extra_parent_plans.last() {
@@ -266,10 +226,6 @@ impl PlanRewriter {
}
fn update_column_requirements(&mut self, plan: &LogicalPlan) {
debug!(
"PlanRewriter: update column requirements for plan: {plan}\n withcolumn_requirements: {:?}",
self.column_requirements
);
let mut container = HashSet::new();
for expr in plan.expressions() {
// this method won't fail
@@ -279,10 +235,6 @@ impl PlanRewriter {
for col in container {
self.column_requirements.insert(col);
}
debug!(
"PlanRewriter: updated column requirements: {:?}",
self.column_requirements
);
}
fn is_expanded(&self) -> bool {

View File

@@ -716,19 +716,17 @@ impl PromPlanner {
..
} = vs;
let matchers = self.preprocess_label_matchers(matchers, name)?;
if let Some(empty_plan) = self.setup_context().await? {
return Ok(empty_plan);
}
ensure!(!range.is_zero(), ZeroRangeSelectorSnafu);
let range_ms = range.as_millis() as _;
self.ctx.range = Some(range_ms);
// Some functions like rate may require special fields in the RangeManipulate plan
// so we can't skip RangeManipulate.
let normalize = match self.setup_context().await? {
Some(empty_plan) => empty_plan,
None => {
self.selector_to_series_normalize_plan(offset, matchers, true)
.await?
}
};
let normalize = self
.selector_to_series_normalize_plan(offset, matchers, true)
.await?;
let manipulate = RangeManipulate::new(
self.ctx.start,
self.ctx.end,

View File

@@ -125,10 +125,8 @@ tonic.workspace = true
tonic-reflection = "0.12"
tower = { workspace = true, features = ["full"] }
tower-http = { version = "0.6", features = ["full"] }
tracing.workspace = true
urlencoding = "2.1"
uuid.workspace = true
vrl.workspace = true
zstd.workspace = true
[target.'cfg(not(windows))'.dependencies]

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::Instant;
@@ -31,10 +30,9 @@ use pipeline::{
use serde_json::{json, Deserializer, Value};
use session::context::{Channel, QueryContext};
use snafu::{ensure, ResultExt};
use vrl::value::Value as VrlValue;
use crate::error::{
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu,
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu, PipelineSnafu,
Result as ServersResult,
};
use crate::http::event::{
@@ -289,8 +287,8 @@ fn parse_bulk_request(
msg_field: &Option<String>,
) -> ServersResult<Vec<PipelineIngestRequest>> {
// Read the ndjson payload and convert it to `Vec<Value>`. Return error if the input is not a valid JSON.
let values: Vec<VrlValue> = Deserializer::from_str(input)
.into_iter::<VrlValue>()
let values: Vec<Value> = Deserializer::from_str(input)
.into_iter::<Value>()
.collect::<Result<_, _>>()
.context(ParseJsonSnafu)?;
@@ -309,13 +307,12 @@ fn parse_bulk_request(
// For Elasticsearch post `_bulk` API, each chunk contains two objects:
// 1. The first object is the command, it should be `create` or `index`.
// 2. The second object is the document data.
while let Some(cmd) = values.next() {
while let Some(mut cmd) = values.next() {
// NOTE: Although the native Elasticsearch API supports upsert in `index` command, we don't support change any data in `index` command and it's same as `create` command.
let mut cmd = cmd.into_object();
let index = if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("create")) {
get_index_from_cmd(cmd)?
} else if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("index")) {
get_index_from_cmd(cmd)?
let index = if let Some(cmd) = cmd.get_mut("create") {
get_index_from_cmd(cmd.take())?
} else if let Some(cmd) = cmd.get_mut("index") {
get_index_from_cmd(cmd.take())?
} else {
return InvalidElasticsearchInputSnafu {
reason: format!(
@@ -342,6 +339,7 @@ fn parse_bulk_request(
}
);
let log_value = pipeline::json_to_map(log_value).context(PipelineSnafu)?;
requests.push(PipelineIngestRequest {
table: index.unwrap_or_else(|| index_from_url.as_ref().unwrap().clone()),
values: vec![log_value],
@@ -359,50 +357,39 @@ fn parse_bulk_request(
}
// Get the index from the command. We will take index as the table name in GreptimeDB.
fn get_index_from_cmd(v: VrlValue) -> ServersResult<Option<String>> {
let Some(index) = v.into_object().and_then(|mut m| m.remove("_index")) else {
return Ok(None);
};
if let VrlValue::Bytes(index) = index {
Ok(Some(String::from_utf8_lossy(&index).to_string()))
} else {
// If the `_index` exists, it should be a string.
InvalidElasticsearchInputSnafu {
reason: "index is not a string in bulk request",
fn get_index_from_cmd(mut v: Value) -> ServersResult<Option<String>> {
if let Some(index) = v.get_mut("_index") {
if let Value::String(index) = index.take() {
Ok(Some(index))
} else {
// If the `_index` exists, it should be a string.
InvalidElasticsearchInputSnafu {
reason: "index is not a string in bulk request".to_string(),
}
.fail()
}
.fail()
} else {
Ok(None)
}
}
// If the msg_field is provided, fetch the value of the field from the document data.
// For example, if the `msg_field` is `message`, and the document data is `{"message":"hello"}`, the log value will be Value::String("hello").
fn get_log_value_from_msg_field(v: VrlValue, msg_field: &str) -> VrlValue {
let VrlValue::Object(mut m) = v else {
return v;
};
if let Some(message) = m.remove(msg_field) {
fn get_log_value_from_msg_field(mut v: Value, msg_field: &str) -> Value {
if let Some(message) = v.get_mut(msg_field) {
let message = message.take();
match message {
VrlValue::Bytes(bytes) => {
match serde_json::from_slice::<VrlValue>(&bytes) {
Ok(v) => v,
// If the message is not a valid JSON, return a map with the original message key and value.
Err(_) => {
let map = BTreeMap::from([(
msg_field.to_string().into(),
VrlValue::Bytes(bytes),
)]);
VrlValue::Object(map)
}
}
}
Value::String(s) => match serde_json::from_str::<Value>(&s) {
Ok(s) => s,
// If the message is not a valid JSON, return a map with the original message key and value.
Err(_) => json!({msg_field: s}),
},
// If the message is not a string, just use the original message as the log value.
_ => message,
}
} else {
// If the msg_field is not found, just use the original message as the log value.
VrlValue::Object(m)
v
}
}
@@ -427,14 +414,12 @@ mod tests {
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap(),
],
},
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
},
]),
),
@@ -451,15 +436,11 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
},
PipelineIngestRequest {
table: "logs".to_string(),
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
},
]),
),
@@ -476,15 +457,11 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
},
PipelineIngestRequest {
table: "logs".to_string(),
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
},
]),
),
@@ -500,9 +477,7 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
},
]),
),
@@ -519,15 +494,11 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
},
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
},
]),
),
@@ -545,13 +516,13 @@ mod tests {
PipelineIngestRequest {
table: "logs-generic-default".to_string(),
values: vec![
json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""}).into(),
pipeline::json_to_map(json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""})).unwrap(),
],
},
PipelineIngestRequest {
table: "logs-generic-default".to_string(),
values: vec![
json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""}).into(),
pipeline::json_to_map(json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""})).unwrap(),
],
},
]),

View File

@@ -269,9 +269,7 @@ pub async fn write_system_metric_by_handler(
if let Err(e) = handler.write(requests, ctx.clone(), false).await {
error!(e; "report export metrics by handler failed");
} else {
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES
.with_label_values(&[ctx.get_db_string().as_str()])
.inc_by(samples as u64);
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(samples as u64);
}
}
}

View File

@@ -35,14 +35,14 @@ use headers::ContentType;
use lazy_static::lazy_static;
use mime_guess::mime;
use pipeline::util::to_pipeline_version;
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value as PipelineValue,
};
use serde::{Deserialize, Serialize};
use serde_json::{json, Deserializer, Map, Value as JsonValue};
use session::context::{Channel, QueryContext, QueryContextRef};
use simd_json::Buffers;
use snafu::{ensure, OptionExt, ResultExt};
use strum::{EnumIter, IntoEnumIterator};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result,
@@ -117,7 +117,7 @@ pub(crate) struct PipelineIngestRequest {
/// The table where the log data will be written to.
pub table: String,
/// The log data to be ingested.
pub values: Vec<VrlValue>,
pub values: Vec<PipelineValue>,
}
pub struct PipelineContent(String);
@@ -295,18 +295,18 @@ pub async fn delete_pipeline(
/// Transform NDJSON array into a single array
/// always return an array
fn transform_ndjson_array_factory(
values: impl IntoIterator<Item = Result<VrlValue, serde_json::Error>>,
values: impl IntoIterator<Item = Result<JsonValue, serde_json::Error>>,
ignore_error: bool,
) -> Result<Vec<VrlValue>> {
) -> Result<Vec<JsonValue>> {
values
.into_iter()
.try_fold(Vec::with_capacity(100), |mut acc_array, item| match item {
Ok(item_value) => {
match item_value {
VrlValue::Array(item_array) => {
JsonValue::Array(item_array) => {
acc_array.extend(item_array);
}
VrlValue::Object(_) => {
JsonValue::Object(_) => {
acc_array.push(item_value);
}
_ => {
@@ -331,7 +331,7 @@ fn transform_ndjson_array_factory(
/// Dryrun pipeline with given data
async fn dryrun_pipeline_inner(
value: Vec<VrlValue>,
value: Vec<PipelineValue>,
pipeline: Arc<pipeline::Pipeline>,
pipeline_handler: PipelineHandlerRef,
query_ctx: &QueryContextRef,
@@ -494,7 +494,7 @@ fn add_step_info_for_pipeline_dryrun_error(step_msg: &str, e: Error) -> Response
/// Parse the data with given content type
/// If the content type is invalid, return error
/// content type is one of application/json, text/plain, application/x-ndjson
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<VrlValue>> {
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<PipelineValue>> {
if let Ok(content_type) = ContentType::from_str(&data_type) {
extract_pipeline_value_by_content_type(content_type, Bytes::from(data), false)
} else {
@@ -741,15 +741,17 @@ impl<'a> TryFrom<&'a ContentType> for EventPayloadResolver<'a> {
}
impl EventPayloadResolver<'_> {
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<VrlValue>> {
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<PipelineValue>> {
match self.inner {
EventPayloadResolverInner::Json => transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
),
EventPayloadResolverInner::Json => {
pipeline::json_array_to_map(transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
)?)
.context(PipelineSnafu)
}
EventPayloadResolverInner::Ndjson => {
let mut result = Vec::with_capacity(1000);
let mut buffer = Buffers::new(1000);
for (index, line) in payload.lines().enumerate() {
let mut line = match line {
Ok(line) if !line.is_empty() => line,
@@ -766,10 +768,8 @@ impl EventPayloadResolver<'_> {
// simd_json, according to description, only de-escapes string at character level,
// like any other json parser. So it should be safe here.
if let Ok(v) = simd_json::serde::from_slice_with_buffers(
unsafe { line.as_bytes_mut() },
&mut buffer,
) {
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
result.push(v);
} else if !ignore_errors {
warn!("invalid JSON at index: {}, content: {:?}", index, line);
@@ -787,11 +787,8 @@ impl EventPayloadResolver<'_> {
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
.map(|line| {
let mut map = BTreeMap::new();
map.insert(
KeyString::from("message"),
VrlValue::Bytes(Bytes::from(line)),
);
VrlValue::Object(map)
map.insert("message".to_string(), PipelineValue::String(line));
PipelineValue::Map(map.into())
})
.collect::<Vec<_>>();
Ok(result)
@@ -804,7 +801,7 @@ fn extract_pipeline_value_by_content_type(
content_type: ContentType,
payload: Bytes,
ignore_errors: bool,
) -> Result<Vec<VrlValue>> {
) -> Result<Vec<PipelineValue>> {
EventPayloadResolver::try_from(&content_type).and_then(|resolver| {
resolver
.parse_payload(payload, ignore_errors)
@@ -902,37 +899,36 @@ pub struct LogState {
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_transform_ndjson() {
let s = "{\"a\": 1}\n{\"b\": 2}";
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.unwrap();
.to_string();
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
let s = "{\"a\": 1}";
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.unwrap();
.to_string();
assert_eq!(a, "[{\"a\":1}]");
let s = "[{\"a\": 1}]";
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.unwrap();
.to_string();
assert_eq!(a, "[{\"a\":1}]");
let s = "[{\"a\": 1}, {\"b\": 2}]";
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.unwrap();
.to_string();
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
}
@@ -949,18 +945,21 @@ mod tests {
let fail_rest =
extract_pipeline_value_by_content_type(ContentType::json(), payload.clone(), true);
assert!(fail_rest.is_ok());
assert_eq!(fail_rest.unwrap(), vec![json!({"a": 1}).into()]);
assert_eq!(
fail_rest.unwrap(),
pipeline::json_array_to_map(vec![json!({"a": 1})]).unwrap()
);
let fail_only_wrong =
extract_pipeline_value_by_content_type(NDJSON_CONTENT_TYPE.clone(), payload, true);
assert!(fail_only_wrong.is_ok());
let mut map1 = BTreeMap::new();
map1.insert(KeyString::from("a"), VrlValue::Integer(1));
let map1 = VrlValue::Object(map1);
map1.insert("a".to_string(), PipelineValue::Uint64(1));
let map1 = PipelineValue::Map(map1.into());
let mut map2 = BTreeMap::new();
map2.insert(KeyString::from("c"), VrlValue::Integer(1));
let map2 = VrlValue::Object(map2);
map2.insert("c".to_string(), PipelineValue::Uint64(1));
let map2 = PipelineValue::Map(map2.into());
assert_eq!(fail_only_wrong.unwrap(), vec![map1, map2]);
}
}

View File

@@ -25,7 +25,6 @@ use axum::extract::State;
use axum::Extension;
use axum_extra::TypedHeader;
use bytes::Bytes;
use chrono::DateTime;
use common_query::prelude::GREPTIME_TIMESTAMP;
use common_query::{Output, OutputData};
use common_telemetry::{error, warn};
@@ -40,7 +39,6 @@ use prost::Message;
use quoted_string::test_utils::TestSpec;
use session::context::{Channel, QueryContext};
use snafu::{ensure, OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DecodeOtlpRequestSnafu, InvalidLokiLabelsSnafu, InvalidLokiPayloadSnafu, ParseJsonSnafu,
@@ -199,7 +197,7 @@ pub async fn loki_ingest(
}
/// This is the holder of the loki lines parsed from json or protobuf.
/// The generic here is either [VrlValue] or [Vec<LabelPairAdapter>].
/// The generic here is either [serde_json::Value] or [Vec<LabelPairAdapter>].
/// Depending on the target destination, this can be converted to [LokiRawItem] or [LokiPipeline].
pub struct LokiMiddleItem<T> {
pub ts: i64,
@@ -220,7 +218,7 @@ pub struct LokiRawItem {
/// This is the line item prepared for the pipeline engine.
pub struct LokiPipeline {
pub map: VrlValue,
pub map: pipeline::Value,
}
/// This is the flow of the Loki ingestion.
@@ -257,7 +255,7 @@ pub struct LokiPipeline {
/// +------------------+ +---------------------+
fn extract_item<T>(content_type: ContentType, bytes: Bytes) -> Result<Box<dyn Iterator<Item = T>>>
where
LokiMiddleItem<VrlValue>: Into<T>,
LokiMiddleItem<serde_json::Value>: Into<T>,
LokiMiddleItem<Vec<LabelPairAdapter>>: Into<T>,
{
match content_type {
@@ -272,14 +270,15 @@ where
}
struct LokiJsonParser {
pub streams: VecDeque<VrlValue>,
pub streams: VecDeque<serde_json::Value>,
}
impl LokiJsonParser {
pub fn from_bytes(bytes: Bytes) -> Result<Self> {
let payload: VrlValue = serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
let payload: serde_json::Value =
serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
let VrlValue::Object(mut map) = payload else {
let serde_json::Value::Object(mut map) = payload else {
return InvalidLokiPayloadSnafu {
msg: "payload is not an object",
}
@@ -290,7 +289,7 @@ impl LokiJsonParser {
msg: "missing streams",
})?;
let VrlValue::Array(streams) = streams else {
let serde_json::Value::Array(streams) = streams else {
return InvalidLokiPayloadSnafu {
msg: "streams is not an array",
}
@@ -309,7 +308,7 @@ impl Iterator for LokiJsonParser {
fn next(&mut self) -> Option<Self::Item> {
while let Some(stream) = self.streams.pop_front() {
// get lines from the map
let VrlValue::Object(mut map) = stream else {
let serde_json::Value::Object(mut map) = stream else {
warn!("stream is not an object, {:?}", stream);
continue;
};
@@ -317,7 +316,7 @@ impl Iterator for LokiJsonParser {
warn!("missing lines on stream, {:?}", map);
continue;
};
let VrlValue::Array(lines) = lines else {
let serde_json::Value::Array(lines) = lines else {
warn!("lines is not an array, {:?}", lines);
continue;
};
@@ -326,15 +325,13 @@ impl Iterator for LokiJsonParser {
let labels = map
.remove(LABEL_KEY)
.and_then(|m| match m {
VrlValue::Object(labels) => Some(labels),
serde_json::Value::Object(labels) => Some(labels),
_ => None,
})
.map(|m| {
m.into_iter()
.filter_map(|(k, v)| match v {
VrlValue::Bytes(v) => {
Some((k.into(), String::from_utf8_lossy(&v).to_string()))
}
serde_json::Value::String(v) => Some((k, v)),
_ => None,
})
.collect::<BTreeMap<String, String>>()
@@ -350,16 +347,16 @@ impl Iterator for LokiJsonParser {
}
struct JsonStreamItem {
pub lines: VecDeque<VrlValue>,
pub lines: VecDeque<serde_json::Value>,
pub labels: Option<BTreeMap<String, String>>,
}
impl Iterator for JsonStreamItem {
type Item = LokiMiddleItem<VrlValue>;
type Item = LokiMiddleItem<serde_json::Value>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(line) = self.lines.pop_front() {
let VrlValue::Array(line) = line else {
let serde_json::Value::Array(line) = line else {
warn!("line is not an array, {:?}", line);
continue;
};
@@ -367,11 +364,11 @@ impl Iterator for JsonStreamItem {
warn!("line is too short, {:?}", line);
continue;
}
let mut line: VecDeque<VrlValue> = line.into();
let mut line: VecDeque<serde_json::Value> = line.into();
// get ts
let ts = line.pop_front().and_then(|ts| match ts {
VrlValue::Bytes(ts) => String::from_utf8_lossy(&ts).parse::<i64>().ok(),
serde_json::Value::String(ts) => ts.parse::<i64>().ok(),
_ => {
warn!("missing or invalid timestamp, {:?}", ts);
None
@@ -382,7 +379,7 @@ impl Iterator for JsonStreamItem {
};
let line_text = line.pop_front().and_then(|l| match l {
VrlValue::Bytes(l) => Some(String::from_utf8_lossy(&l).to_string()),
serde_json::Value::String(l) => Some(l),
_ => {
warn!("missing or invalid line, {:?}", l);
None
@@ -405,8 +402,8 @@ impl Iterator for JsonStreamItem {
}
}
impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
fn from(val: LokiMiddleItem<VrlValue>) -> Self {
impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
fn from(val: LokiMiddleItem<serde_json::Value>) -> Self {
let LokiMiddleItem {
ts,
line,
@@ -416,16 +413,13 @@ impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
let structured_metadata = structured_metadata
.and_then(|m| match m {
VrlValue::Object(m) => Some(m),
serde_json::Value::Object(m) => Some(m),
_ => None,
})
.map(|m| {
m.into_iter()
.filter_map(|(k, v)| match v {
VrlValue::Bytes(bytes) => Some((
k.into(),
Value::String(String::from_utf8_lossy(&bytes).to_string().into()),
)),
serde_json::Value::String(v) => Some((k, Value::String(v.into()))),
_ => None,
})
.collect::<BTreeMap<String, Value>>()
@@ -442,8 +436,8 @@ impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
}
}
impl From<LokiMiddleItem<VrlValue>> for LokiPipeline {
fn from(value: LokiMiddleItem<VrlValue>) -> Self {
impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
fn from(value: LokiMiddleItem<serde_json::Value>) -> Self {
let LokiMiddleItem {
ts,
line,
@@ -453,33 +447,37 @@ impl From<LokiMiddleItem<VrlValue>> for LokiPipeline {
let mut map = BTreeMap::new();
map.insert(
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
GREPTIME_TIMESTAMP.to_string(),
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
);
map.insert(
KeyString::from(LOKI_LINE_COLUMN_NAME),
VrlValue::Bytes(line.into()),
LOKI_LINE_COLUMN_NAME.to_string(),
pipeline::Value::String(line),
);
if let Some(VrlValue::Object(m)) = structured_metadata {
if let Some(serde_json::Value::Object(m)) = structured_metadata {
for (k, v) in m {
map.insert(
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k)),
v,
);
match pipeline::Value::try_from(v) {
Ok(v) => {
map.insert(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k), v);
}
Err(e) => {
warn!("not a valid value, {:?}", e);
}
}
}
}
if let Some(v) = labels {
v.into_iter().for_each(|(k, v)| {
map.insert(
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
VrlValue::Bytes(v.into()),
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
pipeline::Value::String(v),
);
});
}
LokiPipeline {
map: VrlValue::Object(map),
map: pipeline::Value::Map(pipeline::Map::from(map)),
}
}
}
@@ -586,12 +584,12 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
let mut map = BTreeMap::new();
map.insert(
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
GREPTIME_TIMESTAMP.to_string(),
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
);
map.insert(
KeyString::from(LOKI_LINE_COLUMN_NAME),
VrlValue::Bytes(line.into()),
LOKI_LINE_COLUMN_NAME.to_string(),
pipeline::Value::String(line),
);
structured_metadata
@@ -599,22 +597,22 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
.into_iter()
.for_each(|d| {
map.insert(
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name)),
VrlValue::Bytes(d.value.into()),
format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name),
pipeline::Value::String(d.value),
);
});
if let Some(v) = labels {
v.into_iter().for_each(|(k, v)| {
map.insert(
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
VrlValue::Bytes(v.into()),
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
pipeline::Value::String(v),
);
});
}
LokiPipeline {
map: VrlValue::Object(map),
map: pipeline::Value::Map(pipeline::Map::from(map)),
}
}
}

View File

@@ -145,9 +145,7 @@ pub async fn remote_write(
let output = prom_store_handler
.write(reqs, temp_ctx, prom_store_with_metric_engine)
.await?;
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES
.with_label_values(&[db.as_str()])
.inc_by(cnt);
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(cnt);
cost += output.meta.cost;
}

View File

@@ -23,10 +23,10 @@ use common_error::ext::ErrorExt;
use common_query::Output;
use datafusion_expr::LogicalPlan;
use log_query::LogQuery;
use pipeline::Value;
use query::parser::PromQuery;
use session::context::QueryContextRef;
use sql::statements::statement::Statement;
use vrl::value::Value;
/// SqlQueryInterceptor can track life cycle of a sql query and customize or
/// abort its execution at given point.

View File

@@ -26,7 +26,6 @@ use prometheus::{
register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge,
};
use session::context::QueryContext;
use tonic::body::BoxBody;
use tower::{Layer, Service};
@@ -49,13 +48,6 @@ pub(crate) const METRIC_SUCCESS_VALUE: &str = "success";
pub(crate) const METRIC_FAILURE_VALUE: &str = "failure";
lazy_static! {
pub static ref HTTP_REQUEST_COUNTER: IntCounterVec = register_int_counter_vec!(
"greptime_servers_http_request_counter",
"servers http request counter",
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
).unwrap();
pub static ref METRIC_ERROR_COUNTER: IntCounterVec = register_int_counter_vec!(
"greptime_servers_error",
"servers error",
@@ -122,10 +114,9 @@ lazy_static! {
pub static ref METRIC_HTTP_PROM_STORE_CONVERT_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
.with_label_values(&["convert"]);
/// The samples count of Prometheus remote write.
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounterVec = register_int_counter_vec!(
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
"greptime_servers_prometheus_remote_write_samples",
"frontend prometheus remote write samples",
&[METRIC_DB_LABEL]
"frontend prometheus remote write samples"
)
.unwrap();
/// Http prometheus read duration per database.
@@ -176,8 +167,6 @@ lazy_static! {
&[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
)
.unwrap();
/// Count of logs ingested into Loki.
pub static ref METRIC_LOKI_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
"greptime_servers_loki_logs_ingestion_counter",
"servers loki logs ingestion counter",
@@ -198,11 +187,9 @@ lazy_static! {
&[METRIC_DB_LABEL]
)
.unwrap();
/// Count of documents ingested into Elasticsearch logs.
pub static ref METRIC_ELASTICSEARCH_LOGS_DOCS_COUNT: IntCounterVec = register_int_counter_vec!(
"greptime_servers_elasticsearch_logs_docs_count",
"servers elasticsearch ingest logs docs count",
"servers elasticsearch logs docs count",
&[METRIC_DB_LABEL]
)
.unwrap();
@@ -265,13 +252,13 @@ lazy_static! {
pub static ref METRIC_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
"greptime_servers_http_requests_total",
"servers http requests total",
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL]
)
.unwrap();
pub static ref METRIC_HTTP_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
"greptime_servers_http_requests_elapsed",
"servers http requests elapsed",
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL],
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL],
vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
)
.unwrap();
@@ -365,26 +352,19 @@ where
pub(crate) async fn http_metrics_layer(req: Request, next: Next) -> impl IntoResponse {
let start = Instant::now();
let path = if let Some(matched_path) = req.extensions().get::<MatchedPath>() {
matched_path.as_str().to_string()
matched_path.as_str().to_owned()
} else {
req.uri().path().to_string()
req.uri().path().to_owned()
};
let method = req.method().clone();
let db = req
.extensions()
.get::<QueryContext>()
.map(|ctx| ctx.get_db_string())
.unwrap_or_else(|| "unknown".to_string());
let response = next.run(req).await;
let latency = start.elapsed().as_secs_f64();
let status = response.status();
let status = status.as_str();
let method_str = method.as_str();
let status = response.status().as_u16().to_string();
let method_str = method.to_string();
let labels = [method_str, &path, status, db.as_str()];
let labels = [method_str.as_str(), path.as_str(), status.as_str()];
METRIC_HTTP_REQUESTS_TOTAL.with_label_values(&labels).inc();
METRIC_HTTP_REQUESTS_ELAPSED
.with_label_values(&labels)

View File

@@ -185,9 +185,6 @@ fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output
let value = match var_as[0] {
"session.time_zone" | "time_zone" => query_context.timezone().to_string(),
"system_time_zone" => system_timezone_name(),
"max_execution_time" | "session.max_execution_time" => {
query_context.query_timeout_as_millis().to_string()
}
_ => VAR_VALUES
.get(var_as[0])
.map(|v| v.to_string())

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashMap as StdHashMap};
use std::collections::HashMap as StdHashMap;
use api::v1::column_data_type_extension::TypeExt;
use api::v1::value::ValueData;
@@ -20,7 +20,6 @@ use api::v1::{
ColumnDataType, ColumnDataTypeExtension, ColumnOptions, ColumnSchema, JsonTypeExtension, Row,
RowInsertRequest, Rows, SemanticType, Value as GreptimeValue,
};
use bytes::Bytes;
use jsonb::{Number as JsonbNumber, Value as JsonbValue};
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, InstrumentationScope, KeyValue};
@@ -28,13 +27,13 @@ use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo,
};
use serde_json::{Map, Value};
use session::context::QueryContextRef;
use snafu::ensure;
use vrl::prelude::NotNan;
use vrl::value::{KeyString, Value as VrlValue};
use snafu::{ensure, ResultExt};
use crate::error::{
IncompatibleSchemaSnafu, NotSupportedSnafu, Result, UnsupportedJsonDataTypeForTagSnafu,
IncompatibleSchemaSnafu, NotSupportedSnafu, PipelineSnafu, Result,
UnsupportedJsonDataTypeForTagSnafu,
};
use crate::http::event::PipelineIngestRequest;
use crate::otlp::trace::attributes::OtlpAnyValue;
@@ -70,7 +69,8 @@ pub async fn to_grpc_insert_requests(
Ok(ContextReq::default_opt_with_reqs(vec![insert_request]))
}
PipelineWay::Pipeline(pipeline_def) => {
let array = parse_export_logs_service_request(request);
let data = parse_export_logs_service_request(request);
let array = pipeline::json_array_to_map(data).context(PipelineSnafu)?;
let pipeline_ctx =
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
@@ -93,16 +93,16 @@ pub async fn to_grpc_insert_requests(
}
}
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (VrlValue, VrlValue, VrlValue) {
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (Value, Value, Value) {
scope
.map(|x| {
(
VrlValue::Object(key_value_to_map(x.attributes)),
VrlValue::Bytes(x.version.into()),
VrlValue::Bytes(x.name.into()),
Value::Object(key_value_to_map(x.attributes)),
Value::String(x.version),
Value::String(x.name),
)
})
.unwrap_or((VrlValue::Null, VrlValue::Null, VrlValue::Null))
.unwrap_or((Value::Null, Value::Null, Value::Null))
}
fn scope_to_jsonb(
@@ -121,59 +121,53 @@ fn scope_to_jsonb(
fn log_to_pipeline_value(
log: LogRecord,
resource_schema_url: VrlValue,
resource_attr: VrlValue,
scope_schema_url: VrlValue,
scope_name: VrlValue,
scope_version: VrlValue,
scope_attrs: VrlValue,
) -> VrlValue {
let log_attrs = VrlValue::Object(key_value_to_map(log.attributes));
let mut map = BTreeMap::new();
resource_schema_url: Value,
resource_attr: Value,
scope_schema_url: Value,
scope_name: Value,
scope_version: Value,
scope_attrs: Value,
) -> Value {
let log_attrs = Value::Object(key_value_to_map(log.attributes));
let mut map = Map::new();
map.insert("Timestamp".to_string(), Value::from(log.time_unix_nano));
map.insert(
"Timestamp".into(),
VrlValue::Integer(log.time_unix_nano as i64),
);
map.insert(
"ObservedTimestamp".into(),
VrlValue::Integer(log.observed_time_unix_nano as i64),
"ObservedTimestamp".to_string(),
Value::from(log.observed_time_unix_nano),
);
// need to be convert to string
map.insert(
"TraceId".into(),
VrlValue::Bytes(bytes_to_hex_string(&log.trace_id).into()),
"TraceId".to_string(),
Value::String(bytes_to_hex_string(&log.trace_id)),
);
map.insert(
"SpanId".into(),
VrlValue::Bytes(bytes_to_hex_string(&log.span_id).into()),
"SpanId".to_string(),
Value::String(bytes_to_hex_string(&log.span_id)),
);
map.insert("TraceFlags".into(), VrlValue::Integer(log.flags as i64));
map.insert("TraceFlags".to_string(), Value::from(log.flags));
map.insert("SeverityText".to_string(), Value::String(log.severity_text));
map.insert(
"SeverityText".into(),
VrlValue::Bytes(log.severity_text.into()),
);
map.insert(
"SeverityNumber".into(),
VrlValue::Integer(log.severity_number as i64),
"SeverityNumber".to_string(),
Value::from(log.severity_number),
);
// need to be convert to string
map.insert(
"Body".into(),
"Body".to_string(),
log.body
.as_ref()
.map(|x| VrlValue::Bytes(log_body_to_string(x).into()))
.unwrap_or(VrlValue::Null),
.map(|x| Value::String(log_body_to_string(x)))
.unwrap_or(Value::Null),
);
map.insert("ResourceSchemaUrl".into(), resource_schema_url);
map.insert("ResourceSchemaUrl".to_string(), resource_schema_url);
map.insert("ResourceAttributes".into(), resource_attr);
map.insert("ScopeSchemaUrl".into(), scope_schema_url);
map.insert("ScopeName".into(), scope_name);
map.insert("ScopeVersion".into(), scope_version);
map.insert("ScopeAttributes".into(), scope_attrs);
map.insert("LogAttributes".into(), log_attrs);
VrlValue::Object(map)
map.insert("ResourceAttributes".to_string(), resource_attr);
map.insert("ScopeSchemaUrl".to_string(), scope_schema_url);
map.insert("ScopeName".to_string(), scope_name);
map.insert("ScopeVersion".to_string(), scope_version);
map.insert("ScopeAttributes".to_string(), scope_attrs);
map.insert("LogAttributes".to_string(), log_attrs);
Value::Object(map)
}
fn build_otlp_logs_identity_schema() -> Vec<ColumnSchema> {
@@ -628,18 +622,18 @@ fn merge_values(
/// transform otlp logs request to pipeline value
/// https://opentelemetry.io/docs/concepts/signals/logs/
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<VrlValue> {
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<Value> {
let mut result = Vec::new();
for r in request.resource_logs {
let resource_attr = r
.resource
.map(|x| VrlValue::Object(key_value_to_map(x.attributes)))
.unwrap_or(VrlValue::Null);
let resource_schema_url = VrlValue::Bytes(r.schema_url.into());
.map(|x| Value::Object(key_value_to_map(x.attributes)))
.unwrap_or(Value::Null);
let resource_schema_url = Value::String(r.schema_url);
for scope_logs in r.scope_logs {
let (scope_attrs, scope_version, scope_name) =
scope_to_pipeline_value(scope_logs.scope);
let scope_schema_url = VrlValue::Bytes(scope_logs.schema_url.into());
let scope_schema_url = Value::String(scope_logs.schema_url);
for log in scope_logs.log_records {
let value = log_to_pipeline_value(
log,
@@ -658,39 +652,43 @@ fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<V
}
// convert AnyValue to pipeline value
fn any_value_to_vrl_value(value: any_value::Value) -> VrlValue {
fn any_value_to_pipeline_value(value: any_value::Value) -> Value {
match value {
any_value::Value::StringValue(s) => VrlValue::Bytes(s.into()),
any_value::Value::IntValue(i) => VrlValue::Integer(i),
any_value::Value::DoubleValue(d) => VrlValue::Float(NotNan::new(d).unwrap()),
any_value::Value::BoolValue(b) => VrlValue::Boolean(b),
any_value::Value::ArrayValue(array_value) => {
let values = array_value
any_value::Value::StringValue(s) => Value::String(s),
any_value::Value::IntValue(i) => Value::from(i),
any_value::Value::DoubleValue(d) => Value::from(d),
any_value::Value::BoolValue(b) => Value::Bool(b),
any_value::Value::ArrayValue(a) => {
let values = a
.values
.into_iter()
.filter_map(|v| v.value.map(any_value_to_vrl_value))
.map(|v| match v.value {
Some(value) => any_value_to_pipeline_value(value),
None => Value::Null,
})
.collect();
VrlValue::Array(values)
Value::Array(values)
}
any_value::Value::KvlistValue(key_value_list) => {
VrlValue::Object(key_value_to_map(key_value_list.values))
any_value::Value::KvlistValue(kv) => {
let value = key_value_to_map(kv.values);
Value::Object(value)
}
any_value::Value::BytesValue(items) => VrlValue::Bytes(Bytes::from(items)),
any_value::Value::BytesValue(b) => Value::String(bytes_to_hex_string(&b)),
}
}
// convert otlp keyValue vec to map
fn key_value_to_map(key_values: Vec<KeyValue>) -> BTreeMap<KeyString, VrlValue> {
let mut map = BTreeMap::new();
fn key_value_to_map(key_values: Vec<KeyValue>) -> Map<String, Value> {
let mut map = Map::new();
for kv in key_values {
let value = match kv.value {
Some(value) => match value.value {
Some(value) => any_value_to_vrl_value(value),
None => VrlValue::Null,
Some(value) => any_value_to_pipeline_value(value),
None => Value::Null,
},
None => VrlValue::Null,
None => Value::Null,
};
map.insert(kv.key.into(), value);
map.insert(kv.key.clone(), value);
}
map
}

View File

@@ -20,13 +20,12 @@ use api::greptime_proto;
use api::v1::{ColumnDataType, ColumnSchema, RowInsertRequest, Rows, SemanticType};
use common_time::timestamp::TimeUnit;
use pipeline::{
identity_pipeline, unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline,
PipelineContext, PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput,
TransformerMode, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline, PipelineContext,
PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput, TransformerMode, Value,
GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
};
use session::context::{Channel, QueryContextRef};
use snafu::ResultExt;
use vrl::value::Value as VrlValue;
use crate::error::{CatalogSnafu, PipelineSnafu, Result};
use crate::http::event::PipelineIngestRequest;
@@ -94,7 +93,7 @@ async fn run_identity_pipeline(
.await
.context(CatalogSnafu)?
};
identity_pipeline(data_array, table, pipeline_ctx)
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
.map(|opt_map| ContextReq::from_opt_map(opt_map, table_name))
.context(PipelineSnafu)
}
@@ -118,7 +117,7 @@ async fn run_custom_pipeline(
} = pipeline_req;
let arr_len = pipeline_maps.len();
let mut transformed_map = HashMap::new();
let mut dispatched: BTreeMap<DispatchedTo, Vec<VrlValue>> = BTreeMap::new();
let mut dispatched: BTreeMap<DispatchedTo, Vec<Value>> = BTreeMap::new();
let mut schema_info = match pipeline.transformer() {
TransformerMode::GreptimeTransformer(greptime_transformer) => {

View File

@@ -20,15 +20,12 @@ use std::slice;
use api::prom_store::remote::Sample;
use bytes::{Buf, Bytes};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use common_telemetry::warn;
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value};
use prost::encoding::message::merge;
use prost::encoding::{decode_key, decode_varint, WireType};
use prost::DecodeError;
use session::context::QueryContextRef;
use snafu::OptionExt;
use vrl::prelude::NotNan;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::InternalSnafu;
use crate::http::event::PipelineIngestRequest;
@@ -345,7 +342,7 @@ impl PromWriteRequest {
/// let's keep it that way for now.
pub struct PromSeriesProcessor {
pub(crate) use_pipeline: bool,
pub(crate) table_values: BTreeMap<String, Vec<VrlValue>>,
pub(crate) table_values: BTreeMap<String, Vec<Value>>,
// optional fields for pipeline
pub(crate) pipeline_handler: Option<PipelineHandlerRef>,
@@ -382,33 +379,29 @@ impl PromSeriesProcessor {
series: &mut PromTimeSeries,
prom_validation_mode: PromValidationMode,
) -> Result<(), DecodeError> {
let mut vec_pipeline_map = Vec::new();
let mut vec_pipeline_map: Vec<Value> = Vec::new();
let mut pipeline_map = BTreeMap::new();
for l in series.labels.iter() {
let name = prom_validation_mode.decode_string(&l.name)?;
let value = prom_validation_mode.decode_string(&l.value)?;
pipeline_map.insert(KeyString::from(name), VrlValue::Bytes(value.into()));
pipeline_map.insert(name, Value::String(value));
}
let one_sample = series.samples.len() == 1;
for s in series.samples.iter() {
let Ok(value) = NotNan::new(s.value) else {
warn!("Invalid float value: {}", s.value);
// skip NaN value
if s.value.is_nan() {
continue;
};
}
let timestamp = s.timestamp;
pipeline_map.insert(
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Integer(timestamp),
);
pipeline_map.insert(KeyString::from(GREPTIME_VALUE), VrlValue::Float(value));
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
if one_sample {
vec_pipeline_map.push(VrlValue::Object(pipeline_map));
vec_pipeline_map.push(Value::Map(pipeline_map.into()));
break;
} else {
vec_pipeline_map.push(VrlValue::Object(pipeline_map.clone()));
vec_pipeline_map.push(Value::Map(pipeline_map.clone().into()));
}
}

View File

@@ -95,18 +95,6 @@ pub enum Error {
location: Location,
},
#[snafu(display(
"Not allowed to remove partition column {} from table {}",
column_name,
table_name
))]
RemovePartitionColumn {
column_name: String,
table_name: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Failed to build column descriptor for table: {}, column: {}",
table_name,
@@ -205,7 +193,6 @@ impl ErrorExt for Error {
StatusCode::EngineExecuteQuery
}
Error::RemoveColumnInIndex { .. }
| Error::RemovePartitionColumn { .. }
| Error::BuildColumnDescriptor { .. }
| Error::InvalidAlterRequest { .. } => StatusCode::InvalidArguments,
Error::CastDefaultValue { source, .. } => source.status_code(),

View File

@@ -649,19 +649,10 @@ impl TableMeta {
msg: format!("Table {table_name} cannot add new columns {column_names:?}"),
})?;
let partition_key_indices = self
.partition_key_indices
.iter()
.map(|idx| table_schema.column_name_by_index(*idx))
// This unwrap is safe since we only add new columns.
.map(|name| new_schema.column_index_by_name(name).unwrap())
.collect();
// value_indices would be generated automatically.
let _ = meta_builder
.schema(Arc::new(new_schema))
.primary_key_indices(primary_key_indices)
.partition_key_indices(partition_key_indices);
.primary_key_indices(primary_key_indices);
Ok(meta_builder)
}
@@ -689,14 +680,6 @@ impl TableMeta {
}
);
ensure!(
!self.partition_key_indices.contains(&index),
error::RemovePartitionColumnSnafu {
column_name: *column_name,
table_name,
}
);
if let Some(ts_index) = timestamp_index {
// Not allowed to remove column in timestamp index.
ensure!(
@@ -746,18 +729,9 @@ impl TableMeta {
.map(|name| new_schema.column_index_by_name(name).unwrap())
.collect();
let partition_key_indices = self
.partition_key_indices
.iter()
.map(|idx| table_schema.column_name_by_index(*idx))
// This unwrap is safe since we don't allow removing a partition key column.
.map(|name| new_schema.column_index_by_name(name).unwrap())
.collect();
let _ = meta_builder
.schema(Arc::new(new_schema))
.primary_key_indices(primary_key_indices)
.partition_key_indices(partition_key_indices);
.primary_key_indices(primary_key_indices);
Ok(meta_builder)
}
@@ -1360,8 +1334,6 @@ fn unset_column_skipping_index_options(
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use datatypes::data_type::ConcreteDataType;
@@ -1370,7 +1342,6 @@ mod tests {
};
use super::*;
use crate::Error;
/// Create a test schema with 3 columns: `[col1 int32, ts timestampmills, col2 int32]`.
fn new_test_schema() -> Schema {
@@ -1448,11 +1419,6 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
);
let yet_another_field = ColumnSchema::new(
"yet_another_field_after_ts",
ConcreteDataType::int64_datatype(),
true,
);
let alter_kind = AlterKind::AddColumns {
columns: vec![
AddColumnRequest {
@@ -1469,14 +1435,6 @@ mod tests {
}),
add_if_not_exists: false,
},
AddColumnRequest {
column_schema: yet_another_field,
is_key: true,
location: Some(AddColumnLocation::After {
column_name: "ts".to_string(),
}),
add_if_not_exists: false,
},
],
};
@@ -1832,29 +1790,6 @@ mod tests {
assert_eq!(StatusCode::InvalidArguments, err.status_code());
}
#[test]
fn test_remove_partition_column() {
let schema = Arc::new(new_test_schema());
let meta = TableMetaBuilder::empty()
.schema(schema)
.primary_key_indices(vec![])
.partition_key_indices(vec![0])
.engine("engine")
.next_column_id(3)
.build()
.unwrap();
// Remove column in primary key.
let alter_kind = AlterKind::DropColumns {
names: vec![String::from("col1")],
};
let err = meta
.builder_with_alter_kind("my_table", &alter_kind)
.err()
.unwrap();
assert_matches!(err, Error::RemovePartitionColumn { .. });
}
#[test]
fn test_change_key_column_data_type() {
let schema = Arc::new(new_test_schema());
@@ -1920,8 +1855,6 @@ mod tests {
let meta = TableMetaBuilder::empty()
.schema(schema)
.primary_key_indices(vec![0])
// partition col: col1, col2
.partition_key_indices(vec![0, 2])
.engine("engine")
.next_column_id(3)
.build()
@@ -1937,19 +1870,11 @@ mod tests {
.map(|column_schema| column_schema.name.clone())
.collect();
assert_eq!(
&[
"my_tag_first", // primary key column
"col1", // partition column
"ts", // timestamp column
"yet_another_field_after_ts", // primary key column
"my_field_after_ts", // value column
"col2", // partition column
],
&["my_tag_first", "col1", "ts", "my_field_after_ts", "col2"],
&names[..]
);
assert_eq!(&[0, 1, 3], &new_meta.primary_key_indices[..]);
assert_eq!(&[2, 4, 5], &new_meta.value_indices[..]);
assert_eq!(&[1, 5], &new_meta.partition_key_indices[..]);
assert_eq!(&[0, 1], &new_meta.primary_key_indices[..]);
assert_eq!(&[2, 3, 4], &new_meta.value_indices[..]);
}
#[test]

View File

@@ -126,7 +126,7 @@ impl PartialEq<Column> for ColumnEntry {
return false;
}
}
// TODO: Checks `semantic_type`
//TODO: Checks `semantic_type`
match semantic_type(&self.semantic_type) {
Some(SemanticType::Tag) => {
if !other

View File

@@ -174,80 +174,3 @@ DROP TABLE t;
Affected Rows: 0
CREATE TABLE my_table (
a INT PRIMARY KEY,
b STRING,
ts TIMESTAMP TIME INDEX,
)
PARTITION ON COLUMNS (a) (
a < 1000,
a >= 1000 AND a < 2000,
a >= 2000
);
Affected Rows: 0
INSERT INTO my_table VALUES
(100, 'a', 1),
(200, 'b', 2),
(1100, 'c', 3),
(1200, 'd', 4),
(2000, 'e', 5),
(2100, 'f', 6),
(2200, 'g', 7),
(2400, 'h', 8);
Affected Rows: 8
SELECT * FROM my_table WHERE a > 100 order by a;
+------+---+-------------------------+
| a | b | ts |
+------+---+-------------------------+
| 200 | b | 1970-01-01T00:00:00.002 |
| 1100 | c | 1970-01-01T00:00:00.003 |
| 1200 | d | 1970-01-01T00:00:00.004 |
| 2000 | e | 1970-01-01T00:00:00.005 |
| 2100 | f | 1970-01-01T00:00:00.006 |
| 2200 | g | 1970-01-01T00:00:00.007 |
| 2400 | h | 1970-01-01T00:00:00.008 |
+------+---+-------------------------+
SELECT count(*) FROM my_table WHERE a > 100;
+----------+
| count(*) |
+----------+
| 7 |
+----------+
ALTER TABLE my_table ADD COLUMN c STRING FIRST;
Affected Rows: 0
SELECT * FROM my_table WHERE a > 100 order by a;
+---+------+---+-------------------------+
| c | a | b | ts |
+---+------+---+-------------------------+
| | 200 | b | 1970-01-01T00:00:00.002 |
| | 1100 | c | 1970-01-01T00:00:00.003 |
| | 1200 | d | 1970-01-01T00:00:00.004 |
| | 2000 | e | 1970-01-01T00:00:00.005 |
| | 2100 | f | 1970-01-01T00:00:00.006 |
| | 2200 | g | 1970-01-01T00:00:00.007 |
| | 2400 | h | 1970-01-01T00:00:00.008 |
+---+------+---+-------------------------+
SELECT count(*) FROM my_table WHERE a > 100;
+----------+
| count(*) |
+----------+
| 7 |
+----------+
DROP TABLE my_table;
Affected Rows: 0

View File

@@ -47,36 +47,3 @@ SELECT * FROM t;
ALTER TABLE t ADD COLUMN x int xxx;
DROP TABLE t;
CREATE TABLE my_table (
a INT PRIMARY KEY,
b STRING,
ts TIMESTAMP TIME INDEX,
)
PARTITION ON COLUMNS (a) (
a < 1000,
a >= 1000 AND a < 2000,
a >= 2000
);
INSERT INTO my_table VALUES
(100, 'a', 1),
(200, 'b', 2),
(1100, 'c', 3),
(1200, 'd', 4),
(2000, 'e', 5),
(2100, 'f', 6),
(2200, 'g', 7),
(2400, 'h', 8);
SELECT * FROM my_table WHERE a > 100 order by a;
SELECT count(*) FROM my_table WHERE a > 100;
ALTER TABLE my_table ADD COLUMN c STRING FIRST;
SELECT * FROM my_table WHERE a > 100 order by a;
SELECT count(*) FROM my_table WHERE a > 100;
DROP TABLE my_table;

Some files were not shown because too many files have changed in this diff Show More