mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
19 Commits
flow/faste
...
correct-re
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afc3f88240 | ||
|
|
e5e10fd362 | ||
|
|
104d607b3f | ||
|
|
93e3a04aa8 | ||
|
|
c1847e6b6a | ||
|
|
d258739c26 | ||
|
|
914086668d | ||
|
|
01a8ad1304 | ||
|
|
1594859957 | ||
|
|
351a77a2e5 | ||
|
|
7723cba7da | ||
|
|
dd7da3d2c2 | ||
|
|
ffe0da0405 | ||
|
|
f2c7b09825 | ||
|
|
3583b3204f | ||
|
|
fea8bc5ee7 | ||
|
|
40363bfc0f | ||
|
|
85c0136619 | ||
|
|
b70d998596 |
57
Cargo.lock
generated
57
Cargo.lock
generated
@@ -2531,6 +2531,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
"typetag",
|
||||
"uuid",
|
||||
]
|
||||
@@ -2995,9 +2996,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "3.2.1"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
|
||||
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
|
||||
dependencies = [
|
||||
"crc-catalog",
|
||||
]
|
||||
@@ -3806,6 +3807,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3828,7 +3830,7 @@ dependencies = [
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.9.2",
|
||||
"ordered-float 4.3.0",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4149,12 +4151,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "domain"
|
||||
version = "0.10.4"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c84070523f8ba0f9127ff156920f27eb27b302b425efe60bf5f41ec244d1c60"
|
||||
checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"bytes",
|
||||
"domain-macros",
|
||||
"futures-util",
|
||||
"hashbrown 0.14.5",
|
||||
"log",
|
||||
"moka",
|
||||
"octseq",
|
||||
"rand 0.8.5",
|
||||
@@ -4165,6 +4171,17 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "domain-macros"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dotenv"
|
||||
version = "0.15.0"
|
||||
@@ -4794,6 +4811,7 @@ dependencies = [
|
||||
"toml 0.8.19",
|
||||
"tonic 0.12.3",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -7297,6 +7315,7 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7410,6 +7429,7 @@ dependencies = [
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"dotenv",
|
||||
"either",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"index",
|
||||
@@ -7445,6 +7465,7 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml 0.8.19",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -8524,6 +8545,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8560,17 +8582,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "3.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "4.3.0"
|
||||
@@ -8578,6 +8589,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9114,6 +9127,7 @@ dependencies = [
|
||||
"moka",
|
||||
"once_cell",
|
||||
"operator",
|
||||
"ordered-float 4.3.0",
|
||||
"paste",
|
||||
"prometheus",
|
||||
"query",
|
||||
@@ -9925,6 +9939,7 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
"unescaper",
|
||||
"uuid",
|
||||
]
|
||||
@@ -11358,8 +11373,10 @@ dependencies = [
|
||||
"tonic-reflection",
|
||||
"tower 0.5.2",
|
||||
"tower-http 0.6.2",
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"vrl",
|
||||
"zstd 0.13.2",
|
||||
]
|
||||
|
||||
@@ -13022,9 +13039,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.44.2"
|
||||
version = "1.45.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
|
||||
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
@@ -13980,9 +13997,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vrl"
|
||||
version = "0.24.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9ceadaa40aef567a26079ff014ca7a567ba85344f1b81090b5ec7d7bb16a219"
|
||||
checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"aes-siv",
|
||||
|
||||
@@ -167,6 +167,7 @@ opentelemetry-proto = { version = "0.27", features = [
|
||||
"with-serde",
|
||||
"logs",
|
||||
] }
|
||||
ordered-float = { version = "4.3", features = ["serde"] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
@@ -223,10 +224,12 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
|
||||
tower = "0.5"
|
||||
tracing = "0.1"
|
||||
tracing-appender = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
vrl = "0.25"
|
||||
zstd = "0.13"
|
||||
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
|
||||
|
||||
|
||||
149
scripts/install.sh
Executable file → Normal file
149
scripts/install.sh
Executable file → Normal file
@@ -53,6 +53,54 @@ get_arch_type() {
|
||||
esac
|
||||
}
|
||||
|
||||
# Verify SHA256 checksum
|
||||
verify_sha256() {
|
||||
file="$1"
|
||||
expected_sha256="$2"
|
||||
|
||||
if command -v sha256sum >/dev/null 2>&1; then
|
||||
actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
|
||||
elif command -v shasum >/dev/null 2>&1; then
|
||||
actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
|
||||
else
|
||||
echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "$actual_sha256" = "$expected_sha256" ]; then
|
||||
echo "SHA256 checksum verified successfully."
|
||||
return 0
|
||||
else
|
||||
echo "Error: SHA256 checksum verification failed!"
|
||||
echo "Expected: $expected_sha256"
|
||||
echo "Actual: $actual_sha256"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Prompt for user confirmation (compatible with different shells)
|
||||
prompt_confirmation() {
|
||||
message="$1"
|
||||
printf "%s (y/N): " "$message"
|
||||
|
||||
# Try to read user input, fallback if read fails
|
||||
answer=""
|
||||
if read answer </dev/tty 2>/dev/null; then
|
||||
case "$answer" in
|
||||
[Yy]|[Yy][Ee][Ss])
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo ""
|
||||
echo "Cannot read user input. Defaulting to No."
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
download_artifact() {
|
||||
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
# Use the latest stable released version.
|
||||
@@ -71,17 +119,104 @@ download_artifact() {
|
||||
fi
|
||||
|
||||
echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
|
||||
PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
|
||||
PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
|
||||
PACKAGE_NAME="${PKG_NAME}.tar.gz"
|
||||
SHA256_FILE="${PKG_NAME}.sha256sum"
|
||||
|
||||
if [ -n "${PACKAGE_NAME}" ]; then
|
||||
wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
|
||||
# Check if files already exist and prompt for override
|
||||
if [ -f "${PACKAGE_NAME}" ]; then
|
||||
echo "File ${PACKAGE_NAME} already exists."
|
||||
if prompt_confirmation "Do you want to override it?"; then
|
||||
echo "Overriding existing file..."
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
else
|
||||
echo "Skipping download. Using existing file."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f "${BIN}" ]; then
|
||||
echo "Binary ${BIN} already exists."
|
||||
if prompt_confirmation "Do you want to override it?"; then
|
||||
echo "Will override existing binary..."
|
||||
rm -f "${BIN}"
|
||||
else
|
||||
echo "Installation cancelled."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Download package if not exists
|
||||
if [ ! -f "${PACKAGE_NAME}" ]; then
|
||||
echo "Downloading ${PACKAGE_NAME}..."
|
||||
# Use curl instead of wget for better compatibility
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to download ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to download ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Error: Neither curl nor wget is available for downloading."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Download and verify SHA256 checksum
|
||||
echo "Downloading SHA256 checksum..."
|
||||
sha256_download_success=0
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
|
||||
sha256_download_success=1
|
||||
fi
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
|
||||
sha256_download_success=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
|
||||
expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
|
||||
if [ -n "$expected_sha256" ]; then
|
||||
if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
|
||||
echo "SHA256 verification failed. Removing downloaded file."
|
||||
rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Warning: Could not parse SHA256 checksum from file."
|
||||
fi
|
||||
rm -f "${SHA256_FILE}"
|
||||
else
|
||||
echo "Warning: Could not download SHA256 checksum file. Skipping verification."
|
||||
fi
|
||||
|
||||
# Extract the binary and clean the rest.
|
||||
tar xvf "${PACKAGE_NAME}" && \
|
||||
mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
|
||||
rm -r "${PACKAGE_NAME}" && \
|
||||
rm -r "${PACKAGE_NAME%.tar.gz}" && \
|
||||
echo "Run './${BIN} --help' to get started"
|
||||
echo "Extracting ${PACKAGE_NAME}..."
|
||||
if ! tar xf "${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to extract ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find the binary in the extracted directory
|
||||
extracted_dir="${PACKAGE_NAME%.tar.gz}"
|
||||
if [ -f "${extracted_dir}/${BIN}" ]; then
|
||||
mv "${extracted_dir}/${BIN}" "${PWD}/"
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
rm -rf "${extracted_dir}"
|
||||
chmod +x "${BIN}"
|
||||
echo "Installation completed successfully!"
|
||||
echo "Run './${BIN} --help' to get started"
|
||||
else
|
||||
echo "Error: Binary ${BIN} not found in extracted archive"
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
rm -rf "${extracted_dir}"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -169,7 +169,7 @@ impl DfPartitionStream for PGClass {
|
||||
}
|
||||
|
||||
/// Builds the `pg_catalog.pg_class` table row by row
|
||||
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] cuz we don't have user.
|
||||
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] because we don't have users.
|
||||
/// Once we have user system, make it the actual owner of the table.
|
||||
struct PGClassBuilder {
|
||||
schema: SchemaRef,
|
||||
|
||||
@@ -23,7 +23,7 @@ use api::v1::greptime_request::Request;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::{
|
||||
AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
|
||||
InsertRequests, QueryRequest, RequestHeader,
|
||||
InsertRequests, QueryRequest, RequestHeader, RowInsertRequests,
|
||||
};
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
use async_stream::stream;
|
||||
@@ -118,6 +118,7 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the catalog for the database client.
|
||||
pub fn set_catalog(&mut self, catalog: impl Into<String>) {
|
||||
self.catalog = catalog.into();
|
||||
}
|
||||
@@ -130,6 +131,7 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the schema for the database client.
|
||||
pub fn set_schema(&mut self, schema: impl Into<String>) {
|
||||
self.schema = schema.into();
|
||||
}
|
||||
@@ -142,20 +144,24 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the timezone for the database client.
|
||||
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
|
||||
self.timezone = timezone.into();
|
||||
}
|
||||
|
||||
/// Set the auth scheme for the database client.
|
||||
pub fn set_auth(&mut self, auth: AuthScheme) {
|
||||
self.ctx.auth_header = Some(AuthHeader {
|
||||
auth_scheme: Some(auth),
|
||||
});
|
||||
}
|
||||
|
||||
/// Make an InsertRequests request to the database.
|
||||
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
|
||||
self.handle(Request::Inserts(requests)).await
|
||||
}
|
||||
|
||||
/// Make an InsertRequests request to the database with hints.
|
||||
pub async fn insert_with_hints(
|
||||
&self,
|
||||
requests: InsertRequests,
|
||||
@@ -172,6 +178,28 @@ impl Database {
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
/// Make a RowInsertRequests request to the database.
|
||||
pub async fn row_inserts(&self, requests: RowInsertRequests) -> Result<u32> {
|
||||
self.handle(Request::RowInserts(requests)).await
|
||||
}
|
||||
|
||||
/// Make a RowInsertRequests request to the database with hints.
|
||||
pub async fn row_inserts_with_hints(
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
hints: &[(&str, &str)],
|
||||
) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let request = self.to_rpc_request(Request::RowInserts(requests));
|
||||
|
||||
let mut request = tonic::Request::new(request);
|
||||
let metadata = request.metadata_mut();
|
||||
Self::put_hints(metadata, hints)?;
|
||||
|
||||
let response = client.handle(request).await?.into_inner();
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
|
||||
let Some(value) = hints
|
||||
.iter()
|
||||
@@ -187,6 +215,7 @@ impl Database {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Make a request to the database.
|
||||
pub async fn handle(&self, request: Request) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let request = self.to_rpc_request(request);
|
||||
@@ -250,6 +279,7 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a SQL query without any hints.
|
||||
pub async fn sql<S>(&self, sql: S) -> Result<Output>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
@@ -257,6 +287,7 @@ impl Database {
|
||||
self.sql_with_hint(sql, &[]).await
|
||||
}
|
||||
|
||||
/// Executes a SQL query with optional hints for query optimization.
|
||||
pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
@@ -267,6 +298,7 @@ impl Database {
|
||||
self.do_get(request, hints).await
|
||||
}
|
||||
|
||||
/// Executes a logical plan directly without SQL parsing.
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
|
||||
let request = Request::Query(QueryRequest {
|
||||
query: Some(Query::LogicalPlan(logical_plan)),
|
||||
@@ -274,6 +306,7 @@ impl Database {
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
/// Creates a new table using the provided table expression.
|
||||
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::CreateTable(expr)),
|
||||
@@ -281,6 +314,7 @@ impl Database {
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
/// Alters an existing table using the provided alter expression.
|
||||
pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::AlterTable(expr)),
|
||||
|
||||
@@ -69,6 +69,7 @@ table = { workspace = true, features = ["testing"] }
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
typetag.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -20,8 +20,8 @@ use api::v1::region::{alter_request, AlterRequest, RegionRequest, RegionRequestH
|
||||
use api::v1::AlterTableExpr;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_grpc_expr::alter_expr_to_request;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::future;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
@@ -304,5 +304,10 @@ fn build_new_table_info(
|
||||
| AlterKind::DropDefaults { .. } => {}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Built new table info: {:?} for table {}, table_id: {}",
|
||||
new_info.meta, table_name, table_id
|
||||
);
|
||||
|
||||
Ok(new_info)
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::key::table_name::TableNameKey;
|
||||
impl CreateFlowProcedure {
|
||||
/// Allocates the [FlowId].
|
||||
pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
|
||||
//TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
|
||||
// TODO(weny, ruihang): We don't support the partitions. It's always be 1, now.
|
||||
let partitions = 1;
|
||||
let (flow_id, peers) = self
|
||||
.context
|
||||
|
||||
@@ -113,15 +113,19 @@ impl TableMetadataAllocator {
|
||||
table_id: TableId,
|
||||
task: &CreateTableTask,
|
||||
) -> Result<PhysicalTableRouteValue> {
|
||||
let regions = task.partitions.len();
|
||||
let num_regions = task
|
||||
.partitions
|
||||
.as_ref()
|
||||
.map(|p| p.value_list.len())
|
||||
.unwrap_or(1);
|
||||
ensure!(
|
||||
regions > 0,
|
||||
num_regions > 0,
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "The number of partitions must be greater than 0"
|
||||
}
|
||||
);
|
||||
|
||||
let peers = self.peer_allocator.alloc(regions).await?;
|
||||
let peers = self.peer_allocator.alloc(num_regions).await?;
|
||||
debug!("Allocated peers {:?} for table {}", peers, table_id);
|
||||
let region_routes = task
|
||||
.partitions
|
||||
|
||||
@@ -21,7 +21,6 @@ pub mod flownode_handler;
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
use common_procedure::Status;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -145,10 +144,7 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
partitions: None,
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
@@ -183,10 +179,7 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
partitions: None,
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::column_def::try_as_column_schema;
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType};
|
||||
use chrono::DateTime;
|
||||
use common_catalog::consts::{
|
||||
@@ -175,10 +174,7 @@ pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
partitions: None,
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::meta::{Partition, Peer};
|
||||
use api::v1::meta::Peer;
|
||||
use api::v1::region::{region_request, RegionRequest};
|
||||
use api::v1::{ColumnDataType, SemanticType};
|
||||
use common_error::ext::ErrorExt;
|
||||
@@ -141,10 +141,7 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
partitions: None,
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
@@ -218,7 +215,7 @@ async fn test_on_prepare_with_no_partition_err() {
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let mut task = test_create_table_task("foo");
|
||||
task.partitions = vec![];
|
||||
task.partitions = None;
|
||||
task.create_table.create_if_not_exists = true;
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
|
||||
@@ -19,11 +19,17 @@ pub use api::v1::meta::Peer;
|
||||
use crate::error::Error;
|
||||
use crate::{DatanodeId, FlownodeId};
|
||||
|
||||
/// can query peer given a node id
|
||||
/// PeerLookupService is a service that can lookup peers.
|
||||
#[async_trait::async_trait]
|
||||
pub trait PeerLookupService {
|
||||
/// Returns the datanode with the given id. It may return inactive peers.
|
||||
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
|
||||
|
||||
/// Returns the flownode with the given id. It may return inactive peers.
|
||||
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
|
||||
|
||||
/// Returns all currently active frontend nodes that have reported a heartbeat within the most recent heartbeat interval from the in-memory backend.
|
||||
async fn active_frontends(&self) -> Result<Vec<Peer>, Error>;
|
||||
}
|
||||
|
||||
pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;
|
||||
|
||||
@@ -96,7 +96,7 @@ impl DdlTask {
|
||||
/// Creates a [`DdlTask`] to create a table.
|
||||
pub fn new_create_table(
|
||||
expr: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
partitions: Option<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
) -> Self {
|
||||
DdlTask::CreateTable(CreateTableTask::new(expr, partitions, table_info))
|
||||
@@ -107,7 +107,7 @@ impl DdlTask {
|
||||
DdlTask::CreateLogicalTables(
|
||||
table_data
|
||||
.into_iter()
|
||||
.map(|(expr, table_info)| CreateTableTask::new(expr, Vec::new(), table_info))
|
||||
.map(|(expr, table_info)| CreateTableTask::new(expr, None, table_info))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
@@ -606,7 +606,10 @@ impl From<DropTableTask> for PbDropTableTask {
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct CreateTableTask {
|
||||
pub create_table: CreateTableExpr,
|
||||
pub partitions: Vec<Partition>,
|
||||
/// The partitions of the table.
|
||||
///
|
||||
/// If the table is created with a single region (not partitioned), this field is `None`.
|
||||
pub partitions: Option<Partition>,
|
||||
pub table_info: RawTableInfo,
|
||||
}
|
||||
|
||||
@@ -620,7 +623,7 @@ impl TryFrom<PbCreateTableTask> for CreateTableTask {
|
||||
pb.create_table.context(error::InvalidProtoMsgSnafu {
|
||||
err_msg: "expected create table",
|
||||
})?,
|
||||
pb.partitions,
|
||||
pb.partitions.first().cloned(),
|
||||
table_info,
|
||||
))
|
||||
}
|
||||
@@ -633,7 +636,10 @@ impl TryFrom<CreateTableTask> for PbCreateTableTask {
|
||||
Ok(PbCreateTableTask {
|
||||
table_info: serde_json::to_vec(&task.table_info).context(error::SerdeJsonSnafu)?,
|
||||
create_table: Some(task.create_table),
|
||||
partitions: task.partitions,
|
||||
partitions: match task.partitions {
|
||||
Some(p) => vec![p],
|
||||
None => vec![],
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -641,7 +647,7 @@ impl TryFrom<CreateTableTask> for PbCreateTableTask {
|
||||
impl CreateTableTask {
|
||||
pub fn new(
|
||||
expr: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
partitions: Option<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
@@ -701,7 +707,10 @@ impl Serialize for CreateTableTask {
|
||||
|
||||
let pb = PbCreateTableTask {
|
||||
create_table: Some(self.create_table.clone()),
|
||||
partitions: self.partitions.clone(),
|
||||
partitions: match &self.partitions {
|
||||
Some(p) => vec![p.clone()],
|
||||
None => vec![],
|
||||
},
|
||||
table_info,
|
||||
};
|
||||
let buf = pb.encode_to_vec();
|
||||
@@ -1315,7 +1324,7 @@ mod tests {
|
||||
let table_info = test_table_info(1025, "foo", "bar", "baz", Arc::new(schema));
|
||||
let task = CreateTableTask::new(
|
||||
CreateTableExpr::default(),
|
||||
Vec::new(),
|
||||
None,
|
||||
RawTableInfo::from(table_info),
|
||||
);
|
||||
|
||||
@@ -1411,8 +1420,7 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut create_table_task =
|
||||
CreateTableTask::new(create_table_expr, Vec::new(), raw_table_info);
|
||||
let mut create_table_task = CreateTableTask::new(create_table_expr, None, raw_table_info);
|
||||
|
||||
// Call the sort_columns method
|
||||
create_table_task.sort_columns();
|
||||
|
||||
@@ -391,6 +391,9 @@ impl From<Region> for PbRegion {
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialized version of `PartitionDef`.
|
||||
///
|
||||
/// Represent the entire partition part of one table
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
pub struct Partition {
|
||||
#[serde(serialize_with = "as_utf8_vec", deserialize_with = "from_utf8_vec")]
|
||||
|
||||
@@ -213,6 +213,10 @@ impl PeerLookupService for NoopPeerLookupService {
|
||||
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
|
||||
Ok(Some(Peer::empty(id)))
|
||||
}
|
||||
|
||||
async fn active_frontends(&self) -> Result<Vec<Peer>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a kafka topic pool for testing.
|
||||
|
||||
@@ -56,8 +56,18 @@ macro_rules! parse_number_to_value {
|
||||
},
|
||||
)+
|
||||
ConcreteDataType::Timestamp(t) => {
|
||||
let n = parse_sql_number::<i64>($n)?;
|
||||
Ok(Value::Timestamp(Timestamp::new(n, t.unit())))
|
||||
let n = parse_sql_number::<i64>($n)?;
|
||||
let timestamp = Timestamp::new(n, t.unit());
|
||||
|
||||
// Check if the value is within the valid range for the target unit
|
||||
if Timestamp::is_overflow(n, t.unit()) {
|
||||
return TimestampOverflowSnafu {
|
||||
timestamp,
|
||||
target_unit: t.unit(),
|
||||
}.fail();
|
||||
}
|
||||
|
||||
Ok(Value::Timestamp(timestamp))
|
||||
},
|
||||
// TODO(QuenKar): This could need to be optimized
|
||||
// if this from_str function is slow,
|
||||
@@ -362,6 +372,7 @@ pub(crate) fn parse_hex_string(s: &str) -> Result<Value> {
|
||||
mod test {
|
||||
use common_base::bytes::Bytes;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::OrderedFloat;
|
||||
|
||||
use super::*;
|
||||
@@ -1081,4 +1092,89 @@ mod test {
|
||||
);
|
||||
assert!(v.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_number_to_value_timestamp_strict_typing() {
|
||||
// Test that values are interpreted according to the target column type
|
||||
let timestamp_type = TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
|
||||
let data_type = ConcreteDataType::Timestamp(timestamp_type);
|
||||
|
||||
// Valid millisecond timestamp
|
||||
let millisecond_str = "1747814093865";
|
||||
let result = sql_number_to_value(&data_type, millisecond_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Millisecond);
|
||||
assert_eq!(ts.value(), 1747814093865);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
|
||||
// Large value that would overflow when treated as milliseconds should be rejected
|
||||
let nanosecond_str = "1747814093865000000"; // This is too large for millisecond precision
|
||||
let result = sql_number_to_value(&data_type, nanosecond_str);
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Should reject overly large timestamp values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_number_to_value_timestamp_different_units() {
|
||||
// Test second precision
|
||||
let second_type = TimestampType::Second(datatypes::types::TimestampSecondType);
|
||||
let second_data_type = ConcreteDataType::Timestamp(second_type);
|
||||
|
||||
let second_str = "1747814093";
|
||||
let result = sql_number_to_value(&second_data_type, second_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Second);
|
||||
assert_eq!(ts.value(), 1747814093);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
|
||||
// Test nanosecond precision
|
||||
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
|
||||
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
|
||||
|
||||
let nanosecond_str = "1747814093865000000";
|
||||
let result = sql_number_to_value(&nanosecond_data_type, nanosecond_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Nanosecond);
|
||||
assert_eq!(ts.value(), 1747814093865000000);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_range_validation() {
|
||||
// Test that our range checking works correctly
|
||||
let nanosecond_value = 1747814093865000000i64; // This should be too large for millisecond
|
||||
|
||||
// This should work for nanosecond precision
|
||||
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
|
||||
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
|
||||
let result = sql_number_to_value(&nanosecond_data_type, "1747814093865000000");
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Nanosecond value should be valid for nanosecond column"
|
||||
);
|
||||
|
||||
// This should fail for millisecond precision (value too large)
|
||||
let millisecond_type =
|
||||
TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
|
||||
let millisecond_data_type = ConcreteDataType::Timestamp(millisecond_type);
|
||||
let result = sql_number_to_value(&millisecond_data_type, "1747814093865000000");
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Nanosecond value should be rejected for millisecond column"
|
||||
);
|
||||
|
||||
// Verify the ranges work as expected
|
||||
assert!(
|
||||
nanosecond_value > Timestamp::MAX_MILLISECOND.value(),
|
||||
"Test value should exceed millisecond range"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,6 +498,17 @@ impl Timestamp {
|
||||
|
||||
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
|
||||
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
|
||||
|
||||
/// Checks if a value would overflow for the given time unit.
|
||||
pub fn is_overflow(value: i64, unit: TimeUnit) -> bool {
|
||||
let (min_val, max_val) = match unit {
|
||||
TimeUnit::Second => (Self::MIN_SECOND.value(), Self::MAX_SECOND.value()),
|
||||
TimeUnit::Millisecond => (Self::MIN_MILLISECOND.value(), Self::MAX_MILLISECOND.value()),
|
||||
TimeUnit::Microsecond => (Self::MIN_MICROSECOND.value(), Self::MAX_MICROSECOND.value()),
|
||||
TimeUnit::Nanosecond => (Self::MIN_NANOSECOND.value(), Self::MAX_NANOSECOND.value()),
|
||||
};
|
||||
value < min_val || value > max_val
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the naive datetime (which has no specific timezone) to a
|
||||
|
||||
@@ -66,6 +66,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
cache.workspace = true
|
||||
|
||||
@@ -424,7 +424,15 @@ impl CountdownTask {
|
||||
},
|
||||
Some(CountdownCommand::Reset((role, deadline, extension_info))) => {
|
||||
if let Err(err) = self.region_server.set_region_role(self.region_id, role) {
|
||||
error!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
if err.status_code() == StatusCode::RegionNotFound {
|
||||
// Table metadata in metasrv is deleted after its regions are dropped.
|
||||
// The datanode may still receive lease renewal responses that depend on the metadata
|
||||
// during the short period before it is removed.
|
||||
warn!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
}else{
|
||||
error!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
}
|
||||
|
||||
}
|
||||
if let Some(ext_handler) = self.handler_ext.as_ref() {
|
||||
ext_handler.reset_deadline(
|
||||
|
||||
@@ -27,14 +27,14 @@ lazy_static! {
|
||||
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"greptime_datanode_handle_region_request_elapsed",
|
||||
"datanode handle region request elapsed",
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The number of rows in region request received by region server, labeled with request type.
|
||||
pub static ref REGION_CHANGED_ROW_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_datanode_region_changed_row_count",
|
||||
"datanode region changed row count",
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The elapsed time since the last received heartbeat.
|
||||
|
||||
@@ -968,9 +968,8 @@ impl RegionServerInner {
|
||||
request: RegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let request_type = request.request_type();
|
||||
let region_id_str = region_id.to_string();
|
||||
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.with_label_values(&[request_type])
|
||||
.start_timer();
|
||||
|
||||
let region_change = match &request {
|
||||
@@ -1010,7 +1009,7 @@ impl RegionServerInner {
|
||||
// Update metrics
|
||||
if matches!(region_change, RegionChange::Ingest) {
|
||||
crate::metrics::REGION_CHANGED_ROW_COUNT
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.with_label_values(&[request_type])
|
||||
.inc_by(result.affected_rows as u64);
|
||||
}
|
||||
// Sets corresponding region status to ready.
|
||||
|
||||
@@ -28,7 +28,7 @@ greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
ordered-float.workspace = true
|
||||
paste.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -497,7 +497,7 @@ impl StreamingEngine {
|
||||
&self,
|
||||
schema: &RelationDesc,
|
||||
) -> Result<(Vec<String>, Vec<ColumnSchema>, bool), Error> {
|
||||
// TODO(discord9): condiser remove buggy auto create by schema
|
||||
// TODO(discord9): consider remove buggy auto create by schema
|
||||
|
||||
// TODO(discord9): use default key from schema
|
||||
let primary_keys = schema
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
//! Batching mode engine
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::flow::{DirtyWindowRequests, FlowResponse};
|
||||
@@ -142,7 +142,7 @@ impl BatchingEngine {
|
||||
|
||||
let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
|
||||
let src_table_names = &task.config.source_table_names;
|
||||
let mut all_dirty_windows = vec![];
|
||||
let mut all_dirty_windows = HashSet::new();
|
||||
for src_table_name in src_table_names {
|
||||
if let Some((timestamps, unit)) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.config.time_window_expr else {
|
||||
@@ -155,7 +155,7 @@ impl BatchingEngine {
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "Failed to eval start value",
|
||||
})?;
|
||||
all_dirty_windows.push(align_start);
|
||||
all_dirty_windows.insert(align_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,8 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::adapter::util::from_proto_to_data_type;
|
||||
use crate::error::{
|
||||
ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, PlanSnafu, UnexpectedSnafu,
|
||||
ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, PlanSnafu, TimeSnafu,
|
||||
UnexpectedSnafu,
|
||||
};
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
use crate::Error;
|
||||
@@ -74,6 +75,7 @@ pub struct TimeWindowExpr {
|
||||
logical_expr: Expr,
|
||||
df_schema: DFSchema,
|
||||
eval_time_window_size: Option<std::time::Duration>,
|
||||
eval_time_original: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for TimeWindowExpr {
|
||||
@@ -106,10 +108,11 @@ impl TimeWindowExpr {
|
||||
logical_expr: expr.clone(),
|
||||
df_schema: df_schema.clone(),
|
||||
eval_time_window_size: None,
|
||||
eval_time_original: None,
|
||||
};
|
||||
let test_ts = DEFAULT_TEST_TIMESTAMP;
|
||||
let (l, u) = zelf.eval(test_ts)?;
|
||||
let time_window_size = match (l, u) {
|
||||
let (lower, upper) = zelf.eval(test_ts)?;
|
||||
let time_window_size = match (lower, upper) {
|
||||
(Some(l), Some(u)) => u.sub(&l).map(|r| r.to_std()).transpose().map_err(|_| {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
@@ -121,13 +124,59 @@ impl TimeWindowExpr {
|
||||
_ => None,
|
||||
};
|
||||
zelf.eval_time_window_size = time_window_size;
|
||||
zelf.eval_time_original = lower;
|
||||
|
||||
Ok(zelf)
|
||||
}
|
||||
|
||||
/// TODO(discord9): add `eval_batch` too
|
||||
pub fn eval(
|
||||
&self,
|
||||
current: Timestamp,
|
||||
) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
|
||||
fn compute_distance(time_diff_ns: i64, stride_ns: i64) -> i64 {
|
||||
if stride_ns == 0 {
|
||||
return time_diff_ns;
|
||||
}
|
||||
// a - (a % n) impl ceil to nearest n * stride
|
||||
let time_delta = time_diff_ns - (time_diff_ns % stride_ns);
|
||||
|
||||
if time_diff_ns < 0 && time_delta != time_diff_ns {
|
||||
// The origin is later than the source timestamp, round down to the previous bin
|
||||
|
||||
time_delta - stride_ns
|
||||
} else {
|
||||
time_delta
|
||||
}
|
||||
}
|
||||
|
||||
// FAST PATH: if we have eval_time_original and eval_time_window_size,
|
||||
// we can compute the bounds directly
|
||||
if let (Some(original), Some(window_size)) =
|
||||
(self.eval_time_original, self.eval_time_window_size)
|
||||
{
|
||||
// date_bin align current to lower bound
|
||||
let time_diff_ns = current.sub(&original).and_then(|s|s.num_nanoseconds()).with_context(||UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Failed to compute time difference between current {current:?} and original {original:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let window_size_ns = window_size.as_nanos() as i64;
|
||||
|
||||
let distance_ns = compute_distance(time_diff_ns, window_size_ns);
|
||||
|
||||
let lower_bound = if distance_ns >= 0 {
|
||||
original.add_duration(std::time::Duration::from_nanos(distance_ns as u64))
|
||||
} else {
|
||||
original.sub_duration(std::time::Duration::from_nanos((-distance_ns) as u64))
|
||||
}
|
||||
.context(TimeSnafu)?;
|
||||
let upper_bound = lower_bound.add_duration(window_size).context(TimeSnafu)?;
|
||||
|
||||
return Ok((Some(lower_bound), Some(upper_bound)));
|
||||
}
|
||||
|
||||
let lower_bound =
|
||||
calc_expr_time_window_lower_bound(&self.phy_expr, &self.df_schema, current)?;
|
||||
let upper_bound =
|
||||
|
||||
@@ -74,6 +74,7 @@ tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
catalog = { workspace = true, features = ["testing"] }
|
||||
|
||||
@@ -20,8 +20,11 @@ use std::task::{Context, Poll};
|
||||
|
||||
use api::v1::meta::heartbeat_request::NodeWorkloads;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role as ClusterRole};
|
||||
use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
|
||||
use common_meta::kv_backend::{KvBackend, ResettableKvBackendRef};
|
||||
use common_meta::peer::{Peer, PeerLookupService};
|
||||
use common_meta::rpc::store::RangeRequest;
|
||||
use common_meta::{util, DatanodeId, FlownodeId};
|
||||
use common_time::util as time_util;
|
||||
use common_workload::DatanodeWorkloadType;
|
||||
@@ -31,10 +34,19 @@ use crate::cluster::MetaPeerClientRef;
|
||||
use crate::error::{Error, KvBackendSnafu, Result};
|
||||
use crate::key::{DatanodeLeaseKey, FlownodeLeaseKey, LeaseValue};
|
||||
|
||||
fn build_lease_filter(lease_secs: u64) -> impl Fn(&LeaseValue) -> bool {
|
||||
move |v: &LeaseValue| {
|
||||
((time_util::current_time_millis() - v.timestamp_millis) as u64)
|
||||
< lease_secs.saturating_mul(1000)
|
||||
enum Value<'a> {
|
||||
LeaseValue(&'a LeaseValue),
|
||||
NodeInfo(&'a NodeInfo),
|
||||
}
|
||||
|
||||
fn build_lease_filter(lease_secs: u64) -> impl Fn(Value) -> bool {
|
||||
move |value: Value| {
|
||||
let active_time = match value {
|
||||
Value::LeaseValue(lease_value) => lease_value.timestamp_millis,
|
||||
Value::NodeInfo(node_info) => node_info.last_activity_ts,
|
||||
};
|
||||
|
||||
((time_util::current_time_millis() - active_time) as u64) < lease_secs.saturating_mul(1000)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,7 +103,7 @@ pub async fn lookup_datanode_peer(
|
||||
return Ok(None);
|
||||
};
|
||||
let lease_value: LeaseValue = kv.value.try_into()?;
|
||||
let is_alive = lease_filter(&lease_value);
|
||||
let is_alive = lease_filter(Value::LeaseValue(&lease_value));
|
||||
if is_alive {
|
||||
Ok(Some(Peer {
|
||||
id: lease_key.node_id,
|
||||
@@ -155,7 +167,7 @@ where
|
||||
let condition = this.condition;
|
||||
let key_prefix = std::mem::take(&mut this.key_prefix);
|
||||
let fut = filter(key_prefix, this.meta_peer_client, move |v| {
|
||||
lease_filter(v) && condition.unwrap_or(|_| true)(v)
|
||||
lease_filter(Value::LeaseValue(v)) && condition.unwrap_or(|_| true)(v)
|
||||
});
|
||||
|
||||
this.inner_future = Some(Box::pin(fut));
|
||||
@@ -192,7 +204,7 @@ pub async fn lookup_flownode_peer(
|
||||
};
|
||||
let lease_value: LeaseValue = kv.value.try_into()?;
|
||||
|
||||
let is_alive = lease_filter(&lease_value);
|
||||
let is_alive = lease_filter(Value::LeaseValue(&lease_value));
|
||||
if is_alive {
|
||||
Ok(Some(Peer {
|
||||
id: lease_key.node_id,
|
||||
@@ -203,6 +215,29 @@ pub async fn lookup_flownode_peer(
|
||||
}
|
||||
}
|
||||
|
||||
/// Lookup all alive frontends from the memory backend, only return if it's alive under given `lease_secs`.
|
||||
pub async fn lookup_frontends(
|
||||
meta_peer_client: &MetaPeerClientRef,
|
||||
lease_secs: u64,
|
||||
) -> Result<Vec<Peer>> {
|
||||
let range_request =
|
||||
RangeRequest::new().with_prefix(NodeInfoKey::key_prefix_with_role(ClusterRole::Frontend));
|
||||
|
||||
let response = meta_peer_client.range(range_request).await?;
|
||||
let lease_filter = build_lease_filter(lease_secs);
|
||||
|
||||
let mut peers = Vec::with_capacity(response.kvs.len());
|
||||
for kv in response.kvs {
|
||||
let node_info = NodeInfo::try_from(kv.value).context(KvBackendSnafu)?;
|
||||
let is_alive = lease_filter(Value::NodeInfo(&node_info));
|
||||
if is_alive {
|
||||
peers.push(node_info.peer);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(peers)
|
||||
}
|
||||
|
||||
/// Find all alive flownodes
|
||||
pub fn alive_flownodes(
|
||||
meta_peer_client: &MetaPeerClientRef,
|
||||
@@ -264,25 +299,42 @@ impl PeerLookupService for MetaPeerLookupService {
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn flownode(&self, id: FlownodeId) -> common_meta::error::Result<Option<Peer>> {
|
||||
lookup_flownode_peer(id, &self.meta_peer_client, u64::MAX)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn active_frontends(&self) -> common_meta::error::Result<Vec<Peer>> {
|
||||
// Get the active frontends within the last heartbeat interval.
|
||||
lookup_frontends(
|
||||
&self.meta_peer_client,
|
||||
// TODO(zyy17): How to get the heartbeat interval of the frontend if it uses a custom heartbeat interval?
|
||||
FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_meta::error::ExternalSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::meta::heartbeat_request::NodeWorkloads;
|
||||
use api::v1::meta::DatanodeWorkloads;
|
||||
use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus};
|
||||
use common_meta::kv_backend::ResettableKvBackendRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
use common_time::util::current_time_millis;
|
||||
use common_workload::DatanodeWorkloadType;
|
||||
|
||||
use crate::key::{DatanodeLeaseKey, LeaseValue};
|
||||
use crate::lease::{alive_datanodes, is_datanode_accept_ingest_workload};
|
||||
use crate::lease::{
|
||||
alive_datanodes, is_datanode_accept_ingest_workload, lookup_frontends, ClusterRole,
|
||||
};
|
||||
use crate::test_util::create_meta_peer_client;
|
||||
|
||||
async fn put_lease_value(
|
||||
@@ -391,4 +443,60 @@ mod tests {
|
||||
assert_eq!(leases.len(), 1);
|
||||
assert!(leases.contains_key(&DatanodeLeaseKey { node_id: 2 }));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lookup_frontends() {
|
||||
let client = create_meta_peer_client();
|
||||
let in_memory = client.memory_backend();
|
||||
let lease_secs = 10;
|
||||
|
||||
let active_frontend_node = NodeInfo {
|
||||
peer: Peer {
|
||||
id: 0,
|
||||
addr: "127.0.0.1:20201".to_string(),
|
||||
},
|
||||
last_activity_ts: current_time_millis(),
|
||||
status: NodeStatus::Frontend(FrontendStatus {}),
|
||||
version: "1.0.0".to_string(),
|
||||
git_commit: "1234567890".to_string(),
|
||||
start_time_ms: current_time_millis() as u64,
|
||||
};
|
||||
|
||||
let key_prefix = NodeInfoKey::key_prefix_with_role(ClusterRole::Frontend);
|
||||
|
||||
in_memory
|
||||
.put(PutRequest {
|
||||
key: format!("{}{}", key_prefix, "0").into(),
|
||||
value: active_frontend_node.try_into().unwrap(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let inactive_frontend_node = NodeInfo {
|
||||
peer: Peer {
|
||||
id: 1,
|
||||
addr: "127.0.0.1:20201".to_string(),
|
||||
},
|
||||
last_activity_ts: current_time_millis() - 20 * 1000,
|
||||
status: NodeStatus::Frontend(FrontendStatus {}),
|
||||
version: "1.0.0".to_string(),
|
||||
git_commit: "1234567890".to_string(),
|
||||
start_time_ms: current_time_millis() as u64,
|
||||
};
|
||||
|
||||
in_memory
|
||||
.put(PutRequest {
|
||||
key: format!("{}{}", key_prefix, "1").into(),
|
||||
value: inactive_frontend_node.try_into().unwrap(),
|
||||
prev_kv: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let peers = lookup_frontends(&client, lease_secs as u64).await.unwrap();
|
||||
|
||||
assert_eq!(peers.len(), 1);
|
||||
assert_eq!(peers[0].id, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use api::v1::meta::Partition;
|
||||
use api::v1::region::region_request::Body as PbRegionRequest;
|
||||
use api::v1::region::{CreateRequest as PbCreateRegionRequest, RegionColumnDef};
|
||||
use api::v1::{ColumnDataType, ColumnDef as PbColumnDef, SemanticType};
|
||||
@@ -84,14 +83,7 @@ fn create_table_task(table_name: Option<&str>) -> CreateTableTask {
|
||||
.into();
|
||||
|
||||
let table_info = build_raw_table_info_from_expr(&expr);
|
||||
CreateTableTask::new(
|
||||
expr,
|
||||
vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
)
|
||||
CreateTableTask::new(expr, None, table_info)
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -38,6 +38,7 @@ smallvec.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
|
||||
@@ -42,6 +42,7 @@ datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datatypes.workspace = true
|
||||
dotenv.workspace = true
|
||||
either.workspace = true
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
index.workspace = true
|
||||
@@ -75,6 +76,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tracing.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -368,6 +368,7 @@ impl CompactionScheduler {
|
||||
picker_output: picker_output.clone(),
|
||||
start_time,
|
||||
waiters,
|
||||
ttl,
|
||||
};
|
||||
|
||||
let result = remote_job_scheduler
|
||||
|
||||
@@ -20,6 +20,7 @@ use api::v1::region::compact_request;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{info, warn};
|
||||
use common_time::TimeToLive;
|
||||
use either::Either;
|
||||
use itertools::Itertools;
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -116,7 +117,7 @@ pub async fn open_compaction_region(
|
||||
req: &OpenCompactionRegionRequest,
|
||||
mito_config: &MitoConfig,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
|
||||
) -> Result<CompactionRegion> {
|
||||
let object_store = {
|
||||
let name = &req.region_options.storage;
|
||||
@@ -197,16 +198,22 @@ pub async fn open_compaction_region(
|
||||
}
|
||||
};
|
||||
|
||||
let ttl = find_ttl(
|
||||
req.region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
|
||||
TimeToLive::default()
|
||||
});
|
||||
let ttl = match ttl_provider {
|
||||
// Use the specified ttl.
|
||||
Either::Left(ttl) => ttl,
|
||||
// Get the ttl from the schema metadata manager.
|
||||
Either::Right(schema_metadata_manager) => find_ttl(
|
||||
req.region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
|
||||
TimeToLive::default()
|
||||
}),
|
||||
};
|
||||
|
||||
Ok(CompactionRegion {
|
||||
region_id: req.region_id,
|
||||
region_options: req.region_options.clone(),
|
||||
|
||||
@@ -19,6 +19,7 @@ use api::v1::Rows;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use either::Either;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole};
|
||||
use store_api::region_request::{
|
||||
RegionCloseRequest, RegionOpenRequest, RegionPutRequest, RegionRequest,
|
||||
@@ -474,7 +475,7 @@ async fn test_open_compaction_region() {
|
||||
&req,
|
||||
&mito_config,
|
||||
object_store_manager.clone(),
|
||||
schema_metadata_manager,
|
||||
Either::Right(schema_metadata_manager),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -694,7 +694,7 @@ mod tests {
|
||||
let read_format = ReadFormat::new_with_all_columns(metadata.clone());
|
||||
let mut batches = VecDeque::new();
|
||||
read_format
|
||||
.convert_record_batch(&batch, &mut batches)
|
||||
.convert_record_batch(&batch, None, &mut batches)
|
||||
.unwrap();
|
||||
if !dedup {
|
||||
assert_eq!(
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use common_telemetry::error;
|
||||
use common_time::TimeToLive;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{Location, ResultExt, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -108,6 +109,7 @@ pub struct CompactionJob {
|
||||
pub compaction_region: CompactionRegion,
|
||||
pub picker_output: PickerOutput,
|
||||
pub start_time: Instant,
|
||||
pub ttl: TimeToLive,
|
||||
/// Send the result of the compaction job to these waiters.
|
||||
pub waiters: Vec<OutputTx>,
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ mod tests {
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
use datafusion_expr::{col, lit, BinaryExpr, Expr, Operator};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::RecordBatch;
|
||||
use datatypes::arrow::array::{RecordBatch, UInt64Array};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema};
|
||||
use parquet::arrow::AsyncArrowWriter;
|
||||
use parquet::basic::{Compression, Encoding, ZstdLevel};
|
||||
@@ -107,7 +107,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::access_layer::{FilePathProvider, OperationType, RegionFilePathFactory};
|
||||
use crate::cache::{CacheManager, CacheStrategy, PageKey};
|
||||
use crate::read::BatchReader;
|
||||
use crate::read::{BatchBuilder, BatchReader};
|
||||
use crate::region::options::{IndexOptions, InvertedIndexOptions};
|
||||
use crate::sst::file::{FileHandle, FileMeta};
|
||||
use crate::sst::file_purger::NoopFilePurger;
|
||||
@@ -120,8 +120,8 @@ mod tests {
|
||||
use crate::sst::{location, DEFAULT_WRITE_CONCURRENCY};
|
||||
use crate::test_util::sst_util::{
|
||||
assert_parquet_metadata_eq, build_test_binary_test_region_metadata, new_batch_by_range,
|
||||
new_batch_with_binary, new_source, sst_file_handle, sst_file_handle_with_file_id,
|
||||
sst_region_metadata,
|
||||
new_batch_with_binary, new_batch_with_custom_sequence, new_source, sst_file_handle,
|
||||
sst_file_handle_with_file_id, sst_region_metadata,
|
||||
};
|
||||
use crate::test_util::{check_reader_result, TestEnv};
|
||||
|
||||
@@ -895,4 +895,84 @@ mod tests {
|
||||
assert!(cached.contains_row_group(2));
|
||||
assert!(cached.contains_row_group(3));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_with_override_sequence() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let object_store = env.init_object_store_manager();
|
||||
let handle = sst_file_handle(0, 1000);
|
||||
let file_path = FixedPathProvider {
|
||||
file_id: handle.file_id(),
|
||||
};
|
||||
let metadata = Arc::new(sst_region_metadata());
|
||||
|
||||
// Create batches with sequence 0 to trigger override functionality
|
||||
let batch1 = new_batch_with_custom_sequence(&["a", "d"], 0, 60, 0);
|
||||
let batch2 = new_batch_with_custom_sequence(&["b", "f"], 0, 40, 0);
|
||||
let source = new_source(&[batch1, batch2]);
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size: 50,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut writer = ParquetWriter::new_with_object_store(
|
||||
object_store.clone(),
|
||||
metadata.clone(),
|
||||
NoopIndexBuilder,
|
||||
file_path,
|
||||
)
|
||||
.await;
|
||||
|
||||
writer
|
||||
.write_all(source, None, &write_opts)
|
||||
.await
|
||||
.unwrap()
|
||||
.remove(0);
|
||||
|
||||
// Read without override sequence (should read sequence 0)
|
||||
let builder =
|
||||
ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store.clone());
|
||||
let mut reader = builder.build().await.unwrap();
|
||||
let mut normal_batches = Vec::new();
|
||||
while let Some(batch) = reader.next_batch().await.unwrap() {
|
||||
normal_batches.push(batch);
|
||||
}
|
||||
|
||||
// Read with override sequence using FileMeta.sequence
|
||||
let custom_sequence = 12345u64;
|
||||
let file_meta = handle.meta_ref();
|
||||
let mut override_file_meta = file_meta.clone();
|
||||
override_file_meta.sequence = Some(std::num::NonZero::new(custom_sequence).unwrap());
|
||||
let override_handle = FileHandle::new(
|
||||
override_file_meta,
|
||||
Arc::new(crate::sst::file_purger::NoopFilePurger),
|
||||
);
|
||||
|
||||
let builder =
|
||||
ParquetReaderBuilder::new(FILE_DIR.to_string(), override_handle, object_store.clone());
|
||||
let mut reader = builder.build().await.unwrap();
|
||||
let mut override_batches = Vec::new();
|
||||
while let Some(batch) = reader.next_batch().await.unwrap() {
|
||||
override_batches.push(batch);
|
||||
}
|
||||
|
||||
// Compare the results
|
||||
assert_eq!(normal_batches.len(), override_batches.len());
|
||||
for (normal, override_batch) in normal_batches.into_iter().zip(override_batches.iter()) {
|
||||
// Create expected batch with override sequence
|
||||
let expected_batch = {
|
||||
let num_rows = normal.num_rows();
|
||||
let mut builder = BatchBuilder::from(normal);
|
||||
builder
|
||||
.sequences_array(Arc::new(UInt64Array::from_value(custom_sequence, num_rows)))
|
||||
.unwrap();
|
||||
|
||||
builder.build().unwrap()
|
||||
};
|
||||
|
||||
// Override batch should match expected batch
|
||||
assert_eq!(*override_batch, expected_batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +146,8 @@ pub struct ReadFormat {
|
||||
/// Field column id to their index in the projected schema (
|
||||
/// the schema of [Batch]).
|
||||
field_id_to_projected_index: HashMap<ColumnId, usize>,
|
||||
/// Sequence number to override the sequence read from the SST.
|
||||
override_sequence: Option<SequenceNumber>,
|
||||
}
|
||||
|
||||
impl ReadFormat {
|
||||
@@ -197,9 +199,15 @@ impl ReadFormat {
|
||||
field_id_to_index,
|
||||
projection_indices,
|
||||
field_id_to_projected_index,
|
||||
override_sequence: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the sequence number to override.
|
||||
pub(crate) fn set_override_sequence(&mut self, sequence: Option<SequenceNumber>) {
|
||||
self.override_sequence = sequence;
|
||||
}
|
||||
|
||||
/// Gets the arrow schema of the SST file.
|
||||
///
|
||||
/// This schema is computed from the region metadata but should be the same
|
||||
@@ -218,12 +226,20 @@ impl ReadFormat {
|
||||
&self.projection_indices
|
||||
}
|
||||
|
||||
/// Creates a sequence array to override.
|
||||
pub(crate) fn new_override_sequence_array(&self, length: usize) -> Option<ArrayRef> {
|
||||
self.override_sequence
|
||||
.map(|seq| Arc::new(UInt64Array::from_value(seq, length)) as ArrayRef)
|
||||
}
|
||||
|
||||
/// Convert a arrow record batch into `batches`.
|
||||
///
|
||||
/// The length of `override_sequence_array` must be larger than the length of the record batch.
|
||||
/// Note that the `record_batch` may only contains a subset of columns if it is projected.
|
||||
pub fn convert_record_batch(
|
||||
&self,
|
||||
record_batch: &RecordBatch,
|
||||
override_sequence_array: Option<&ArrayRef>,
|
||||
batches: &mut VecDeque<Batch>,
|
||||
) -> Result<()> {
|
||||
debug_assert!(batches.is_empty());
|
||||
@@ -246,11 +262,23 @@ impl ReadFormat {
|
||||
.take(FIXED_POS_COLUMN_NUM);
|
||||
// Safety: We have checked the column number.
|
||||
let op_type_array = fixed_pos_columns.next().unwrap();
|
||||
let sequence_array = fixed_pos_columns.next().unwrap();
|
||||
let mut sequence_array = fixed_pos_columns.next().unwrap().clone();
|
||||
let pk_array = fixed_pos_columns.next().unwrap();
|
||||
let ts_array = fixed_pos_columns.next().unwrap();
|
||||
let field_batch_columns = self.get_field_batch_columns(record_batch)?;
|
||||
|
||||
// Override sequence array if provided.
|
||||
if let Some(override_array) = override_sequence_array {
|
||||
assert!(override_array.len() >= sequence_array.len());
|
||||
// It's fine to assign the override array directly, but we slice it to make
|
||||
// sure it matches the length of the original sequence array.
|
||||
sequence_array = if override_array.len() > sequence_array.len() {
|
||||
override_array.slice(0, sequence_array.len())
|
||||
} else {
|
||||
override_array.clone()
|
||||
};
|
||||
}
|
||||
|
||||
// Compute primary key offsets.
|
||||
let pk_dict_array = pk_array
|
||||
.as_any()
|
||||
@@ -691,6 +719,39 @@ pub(crate) fn parquet_row_group_time_range(
|
||||
Some((Timestamp::new(min, unit), Timestamp::new(max, unit)))
|
||||
}
|
||||
|
||||
/// Checks if sequence override is needed based on all row groups' statistics.
|
||||
/// Returns true if ALL row groups have sequence min-max values of 0.
|
||||
pub(crate) fn need_override_sequence(parquet_meta: &ParquetMetaData) -> bool {
|
||||
let num_columns = parquet_meta.file_metadata().schema_descr().num_columns();
|
||||
if num_columns < FIXED_POS_COLUMN_NUM {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The sequence column is the second-to-last column (before op_type)
|
||||
let sequence_pos = num_columns - 2;
|
||||
|
||||
// Check all row groups - all must have sequence min-max of 0
|
||||
for row_group in parquet_meta.row_groups() {
|
||||
if let Some(Statistics::Int64(value_stats)) = row_group.column(sequence_pos).statistics() {
|
||||
if let (Some(min_val), Some(max_val)) = (value_stats.min_opt(), value_stats.max_opt()) {
|
||||
// If any row group doesn't have min=0 and max=0, return false
|
||||
if *min_val != 0 || *max_val != 0 {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// If any row group doesn't have statistics, return false
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// If any row group doesn't have Int64 statistics, return false
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All row groups have sequence min-max of 0, or there are no row groups
|
||||
!parquet_meta.row_groups().is_empty()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::OpType;
|
||||
@@ -775,9 +836,19 @@ mod tests {
|
||||
}
|
||||
|
||||
fn new_batch(primary_key: &[u8], start_ts: i64, start_field: i64, num_rows: usize) -> Batch {
|
||||
new_batch_with_sequence(primary_key, start_ts, start_field, num_rows, TEST_SEQUENCE)
|
||||
}
|
||||
|
||||
fn new_batch_with_sequence(
|
||||
primary_key: &[u8],
|
||||
start_ts: i64,
|
||||
start_field: i64,
|
||||
num_rows: usize,
|
||||
sequence: u64,
|
||||
) -> Batch {
|
||||
let ts_values = (0..num_rows).map(|i| start_ts + i as i64);
|
||||
let timestamps = Arc::new(TimestampMillisecondVector::from_values(ts_values));
|
||||
let sequences = Arc::new(UInt64Vector::from_vec(vec![TEST_SEQUENCE; num_rows]));
|
||||
let sequences = Arc::new(UInt64Vector::from_vec(vec![sequence; num_rows]));
|
||||
let op_types = Arc::new(UInt8Vector::from_vec(vec![TEST_OP_TYPE; num_rows]));
|
||||
let fields = vec![
|
||||
BatchColumn {
|
||||
@@ -930,7 +1001,7 @@ mod tests {
|
||||
let record_batch = RecordBatch::new_empty(arrow_schema);
|
||||
let mut batches = VecDeque::new();
|
||||
read_format
|
||||
.convert_record_batch(&record_batch, &mut batches)
|
||||
.convert_record_batch(&record_batch, None, &mut batches)
|
||||
.unwrap();
|
||||
assert!(batches.is_empty());
|
||||
}
|
||||
@@ -957,7 +1028,7 @@ mod tests {
|
||||
let record_batch = RecordBatch::try_new(arrow_schema, columns).unwrap();
|
||||
let mut batches = VecDeque::new();
|
||||
read_format
|
||||
.convert_record_batch(&record_batch, &mut batches)
|
||||
.convert_record_batch(&record_batch, None, &mut batches)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
@@ -965,4 +1036,45 @@ mod tests {
|
||||
batches.into_iter().collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_record_batch_with_override_sequence() {
|
||||
let metadata = build_test_region_metadata();
|
||||
let column_ids: Vec<_> = metadata
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|col| col.column_id)
|
||||
.collect();
|
||||
let read_format = ReadFormat::new(metadata, column_ids.iter().copied());
|
||||
|
||||
let columns: Vec<ArrayRef> = vec![
|
||||
Arc::new(Int64Array::from(vec![1, 1, 10, 10])), // field1
|
||||
Arc::new(Int64Array::from(vec![2, 2, 11, 11])), // field0
|
||||
Arc::new(TimestampMillisecondArray::from(vec![1, 2, 11, 12])), // ts
|
||||
build_test_pk_array(&[(b"one".to_vec(), 2), (b"two".to_vec(), 2)]), // primary key
|
||||
Arc::new(UInt64Array::from(vec![TEST_SEQUENCE; 4])), // sequence
|
||||
Arc::new(UInt8Array::from(vec![TEST_OP_TYPE; 4])), // op type
|
||||
];
|
||||
let arrow_schema = build_test_arrow_schema();
|
||||
let record_batch = RecordBatch::try_new(arrow_schema, columns).unwrap();
|
||||
|
||||
// Create override sequence array with custom values
|
||||
let override_sequence: u64 = 12345;
|
||||
let override_sequence_array: ArrayRef =
|
||||
Arc::new(UInt64Array::from_value(override_sequence, 4));
|
||||
|
||||
let mut batches = VecDeque::new();
|
||||
read_format
|
||||
.convert_record_batch(&record_batch, Some(&override_sequence_array), &mut batches)
|
||||
.unwrap();
|
||||
|
||||
// Create expected batches with override sequence
|
||||
let expected_batch1 = new_batch_with_sequence(b"one", 1, 1, 2, override_sequence);
|
||||
let expected_batch2 = new_batch_with_sequence(b"two", 11, 10, 2, override_sequence);
|
||||
|
||||
assert_eq!(
|
||||
vec![expected_batch1, expected_batch2],
|
||||
batches.into_iter().collect::<Vec<_>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ use async_trait::async_trait;
|
||||
use common_recordbatch::filter::SimpleFilterEvaluator;
|
||||
use common_telemetry::{debug, warn};
|
||||
use datafusion_expr::Expr;
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
@@ -54,7 +55,7 @@ use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierRef;
|
||||
use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef;
|
||||
use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef;
|
||||
use crate::sst::parquet::file_range::{FileRangeContext, FileRangeContextRef};
|
||||
use crate::sst::parquet::format::ReadFormat;
|
||||
use crate::sst::parquet::format::{need_override_sequence, ReadFormat};
|
||||
use crate::sst::parquet::metadata::MetadataLoader;
|
||||
use crate::sst::parquet::row_group::InMemoryRowGroup;
|
||||
use crate::sst::parquet::row_selection::RowGroupSelection;
|
||||
@@ -220,7 +221,7 @@ impl ParquetReaderBuilder {
|
||||
let key_value_meta = parquet_meta.file_metadata().key_value_metadata();
|
||||
// Gets the metadata stored in the SST.
|
||||
let region_meta = Arc::new(Self::get_region_metadata(&file_path, key_value_meta)?);
|
||||
let read_format = if let Some(column_ids) = &self.projection {
|
||||
let mut read_format = if let Some(column_ids) = &self.projection {
|
||||
ReadFormat::new(region_meta.clone(), column_ids.iter().copied())
|
||||
} else {
|
||||
// Lists all column ids to read, we always use the expected metadata if possible.
|
||||
@@ -233,6 +234,10 @@ impl ParquetReaderBuilder {
|
||||
.map(|col| col.column_id),
|
||||
)
|
||||
};
|
||||
if need_override_sequence(&parquet_meta) {
|
||||
read_format
|
||||
.set_override_sequence(self.file_handle.meta_ref().sequence.map(|x| x.get()));
|
||||
}
|
||||
|
||||
// Computes the projection mask.
|
||||
let parquet_schema_desc = parquet_meta.file_metadata().schema_descr();
|
||||
@@ -1230,12 +1235,7 @@ pub(crate) type RowGroupReader = RowGroupReaderBase<FileRangeContextRef>;
|
||||
impl RowGroupReader {
|
||||
/// Creates a new reader from file range.
|
||||
pub(crate) fn new(context: FileRangeContextRef, reader: ParquetRecordBatchReader) -> Self {
|
||||
Self {
|
||||
context,
|
||||
reader,
|
||||
batches: VecDeque::new(),
|
||||
metrics: ReaderMetrics::default(),
|
||||
}
|
||||
Self::create(context, reader)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1249,6 +1249,8 @@ pub(crate) struct RowGroupReaderBase<T> {
|
||||
batches: VecDeque<Batch>,
|
||||
/// Local scan metrics.
|
||||
metrics: ReaderMetrics,
|
||||
/// Cached sequence array to override sequences.
|
||||
override_sequence: Option<ArrayRef>,
|
||||
}
|
||||
|
||||
impl<T> RowGroupReaderBase<T>
|
||||
@@ -1257,11 +1259,16 @@ where
|
||||
{
|
||||
/// Creates a new reader.
|
||||
pub(crate) fn create(context: T, reader: ParquetRecordBatchReader) -> Self {
|
||||
// The batch length from the reader should be less than or equal to DEFAULT_READ_BATCH_SIZE.
|
||||
let override_sequence = context
|
||||
.read_format()
|
||||
.new_override_sequence_array(DEFAULT_READ_BATCH_SIZE);
|
||||
Self {
|
||||
context,
|
||||
reader,
|
||||
batches: VecDeque::new(),
|
||||
metrics: ReaderMetrics::default(),
|
||||
override_sequence,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1297,9 +1304,11 @@ where
|
||||
};
|
||||
self.metrics.num_record_batches += 1;
|
||||
|
||||
self.context
|
||||
.read_format()
|
||||
.convert_record_batch(&record_batch, &mut self.batches)?;
|
||||
self.context.read_format().convert_record_batch(
|
||||
&record_batch,
|
||||
self.override_sequence.as_ref(),
|
||||
&mut self.batches,
|
||||
)?;
|
||||
self.metrics.num_batches += self.batches.len();
|
||||
}
|
||||
let batch = self.batches.pop_front();
|
||||
|
||||
@@ -72,7 +72,7 @@ use crate::error::Result;
|
||||
use crate::flush::{WriteBufferManager, WriteBufferManagerRef};
|
||||
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
|
||||
use crate::read::{Batch, BatchBuilder, BatchReader};
|
||||
use crate::sst::file_purger::{FilePurger, FilePurgerRef, NoopFilePurger, PurgeRequest};
|
||||
use crate::sst::file_purger::{FilePurgerRef, NoopFilePurger};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
use crate::time_provider::{StdTimeProvider, TimeProviderRef};
|
||||
|
||||
@@ -138,11 +138,17 @@ pub fn sst_file_handle(start_ms: i64, end_ms: i64) -> FileHandle {
|
||||
sst_file_handle_with_file_id(FileId::random(), start_ms, end_ms)
|
||||
}
|
||||
|
||||
pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
|
||||
/// Creates a new batch with custom sequence for testing.
|
||||
pub fn new_batch_with_custom_sequence(
|
||||
tags: &[&str],
|
||||
start: usize,
|
||||
end: usize,
|
||||
sequence: u64,
|
||||
) -> Batch {
|
||||
assert!(end >= start);
|
||||
let pk = new_primary_key(tags);
|
||||
let timestamps: Vec<_> = (start..end).map(|v| v as i64).collect();
|
||||
let sequences = vec![1000; end - start];
|
||||
let sequences = vec![sequence; end - start];
|
||||
let op_types = vec![OpType::Put; end - start];
|
||||
let field: Vec<_> = (start..end).map(|v| v as u64).collect();
|
||||
new_batch_builder(&pk, ×tamps, &sequences, &op_types, 2, &field)
|
||||
@@ -150,6 +156,10 @@ pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn new_batch_by_range(tags: &[&str], start: usize, end: usize) -> Batch {
|
||||
new_batch_with_custom_sequence(tags, start, end, 1000)
|
||||
}
|
||||
|
||||
pub fn new_batch_with_binary(tags: &[&str], start: usize, end: usize) -> Batch {
|
||||
assert!(end >= start);
|
||||
let pk = new_primary_key(tags);
|
||||
|
||||
@@ -69,6 +69,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
|
||||
@@ -654,7 +654,7 @@ impl StatementExecutor {
|
||||
ctx.clone(),
|
||||
)?;
|
||||
|
||||
//TODO(dennis): validate the logical plan
|
||||
// TODO(dennis): validate the logical plan
|
||||
self.create_view_by_expr(expr, ctx).await
|
||||
}
|
||||
|
||||
@@ -1389,12 +1389,11 @@ impl StatementExecutor {
|
||||
async fn create_table_procedure(
|
||||
&self,
|
||||
create_table: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
partitions: Option<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let partitions = partitions.into_iter().map(Into::into).collect();
|
||||
|
||||
let partitions = partitions.map(|p| p.into()); // to PbPartition
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_create_table(create_table, partitions, table_info),
|
||||
@@ -1590,7 +1589,7 @@ fn parse_partitions(
|
||||
create_table: &CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<(Vec<MetaPartition>, Vec<String>)> {
|
||||
) -> Result<(Option<MetaPartition>, Vec<String>)> {
|
||||
// If partitions are not defined by user, use the timestamp column (which has to be existed) as
|
||||
// the partition column, and create only one partition.
|
||||
let partition_columns = find_partition_columns(&partitions)?;
|
||||
@@ -1600,23 +1599,26 @@ fn parse_partitions(
|
||||
// Validates partition
|
||||
let mut exprs = vec![];
|
||||
for partition in &partition_entries {
|
||||
for bound in partition {
|
||||
if let PartitionBound::Expr(expr) = bound {
|
||||
exprs.push(expr.clone());
|
||||
}
|
||||
if let PartitionBound::Expr(expr) = partition {
|
||||
exprs.push(expr.clone());
|
||||
}
|
||||
}
|
||||
MultiDimPartitionRule::try_new(partition_columns.clone(), vec![], exprs, true)
|
||||
.context(InvalidPartitionSnafu)?;
|
||||
|
||||
Ok((
|
||||
partition_entries
|
||||
.into_iter()
|
||||
.map(|x| MetaPartition::try_from(PartitionDef::new(partition_columns.clone(), x)))
|
||||
.collect::<std::result::Result<_, _>>()
|
||||
let meta_partition = if partition_entries.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
MetaPartition::try_from(PartitionDef::new(
|
||||
partition_columns.clone(),
|
||||
partition_entries,
|
||||
))
|
||||
.context(DeserializePartitionSnafu)?,
|
||||
partition_columns,
|
||||
))
|
||||
)
|
||||
};
|
||||
|
||||
Ok((meta_partition, partition_columns))
|
||||
}
|
||||
|
||||
fn create_table_info(
|
||||
@@ -1727,7 +1729,7 @@ fn find_partition_entries(
|
||||
partitions: &Option<Partitions>,
|
||||
partition_columns: &[String],
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<Vec<Vec<PartitionBound>>> {
|
||||
) -> Result<Vec<PartitionBound>> {
|
||||
let entries = if let Some(partitions) = partitions {
|
||||
// extract concrete data type of partition columns
|
||||
let column_defs = partition_columns
|
||||
@@ -1756,17 +1758,17 @@ fn find_partition_entries(
|
||||
for partition in &partitions.exprs {
|
||||
let partition_expr =
|
||||
convert_one_expr(partition, &column_name_and_type, &query_ctx.timezone())?;
|
||||
partition_exprs.push(vec![PartitionBound::Expr(partition_expr)]);
|
||||
partition_exprs.push(PartitionBound::Expr(partition_expr));
|
||||
}
|
||||
|
||||
// fallback for no expr
|
||||
if partition_exprs.is_empty() {
|
||||
partition_exprs.push(vec![PartitionBound::MaxValue]);
|
||||
partition_exprs.push(PartitionBound::MaxValue);
|
||||
}
|
||||
|
||||
partition_exprs
|
||||
} else {
|
||||
vec![vec![PartitionBound::MaxValue]]
|
||||
vec![PartitionBound::MaxValue]
|
||||
};
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
@@ -48,17 +48,21 @@ pub trait PartitionRule: Sync + Send {
|
||||
) -> Result<HashMap<RegionNumber, RegionMask>>;
|
||||
}
|
||||
|
||||
/// The right bound(exclusive) of partition range.
|
||||
/// The bound of one partition.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum PartitionBound {
|
||||
/// Deprecated since 0.9.0.
|
||||
Value(Value),
|
||||
/// Deprecated since 0.15.0.
|
||||
MaxValue,
|
||||
Expr(crate::expr::PartitionExpr),
|
||||
}
|
||||
|
||||
/// The partition definition of one table.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PartitionDef {
|
||||
partition_columns: Vec<String>,
|
||||
/// Each element represents one partition.
|
||||
partition_bounds: Vec<PartitionBound>,
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["sync"] }
|
||||
once_cell.workspace = true
|
||||
operator.workspace = true
|
||||
ordered-float.workspace = true
|
||||
paste.workspace = true
|
||||
prometheus.workspace = true
|
||||
query.workspace = true
|
||||
@@ -59,7 +60,7 @@ sql.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
urlencoding = "2.1"
|
||||
vrl = "0.24"
|
||||
vrl.workspace = true
|
||||
yaml-rust = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -16,23 +16,21 @@ use std::sync::Arc;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use pipeline::error::Result;
|
||||
use pipeline::{
|
||||
json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo,
|
||||
};
|
||||
use serde_json::{Deserializer, Value};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo};
|
||||
use serde_json::Deserializer;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
fn processor_mut(
|
||||
pipeline: Arc<Pipeline>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
input_values: Vec<Value>,
|
||||
input_values: Vec<VrlValue>,
|
||||
) -> Result<Vec<greptime_proto::v1::Row>> {
|
||||
let mut result = Vec::with_capacity(input_values.len());
|
||||
|
||||
for v in input_values {
|
||||
let payload = json_to_map(v).unwrap();
|
||||
let r = pipeline
|
||||
.exec_mut(payload, pipeline_ctx, schema_info)?
|
||||
.exec_mut(v, pipeline_ctx, schema_info)?
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
result.push(r.0);
|
||||
@@ -237,7 +235,7 @@ transform:
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let input_value_str = include_str!("./data.log");
|
||||
let input_value = Deserializer::from_str(input_value_str)
|
||||
.into_iter::<serde_json::Value>()
|
||||
.into_iter::<VrlValue>()
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
let pipeline = prepare_pipeline();
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
use yaml_rust::Yaml;
|
||||
|
||||
use crate::error::{
|
||||
@@ -21,7 +22,7 @@ use crate::error::{
|
||||
ValueRequiredForDispatcherRuleSnafu,
|
||||
};
|
||||
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
|
||||
use crate::Value;
|
||||
use crate::etl::value::yaml_to_vrl_value;
|
||||
|
||||
const FIELD: &str = "field";
|
||||
const PIPELINE: &str = "pipeline";
|
||||
@@ -62,7 +63,7 @@ pub(crate) struct Dispatcher {
|
||||
/// name
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Rule {
|
||||
pub value: Value,
|
||||
pub value: VrlValue,
|
||||
pub table_suffix: String,
|
||||
pub pipeline: Option<String>,
|
||||
}
|
||||
@@ -90,7 +91,8 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
if rule[VALUE].is_badvalue() {
|
||||
ValueRequiredForDispatcherRuleSnafu.fail()?;
|
||||
}
|
||||
let value = Value::try_from(&rule[VALUE])?;
|
||||
|
||||
let value = yaml_to_vrl_value(&rule[VALUE])?;
|
||||
|
||||
Ok(Rule {
|
||||
value,
|
||||
@@ -109,8 +111,9 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
|
||||
impl Dispatcher {
|
||||
/// execute dispatcher and returns matched rule if any
|
||||
pub(crate) fn exec(&self, data: &Value) -> Option<&Rule> {
|
||||
if let Some(value) = data.get(&self.field) {
|
||||
pub(crate) fn exec(&self, data: &VrlValue) -> Option<&Rule> {
|
||||
let data = data.as_object()?;
|
||||
if let Some(value) = data.get(self.field.as_str()) {
|
||||
for rule in &self.rules {
|
||||
if rule.value == *value {
|
||||
return Some(rule);
|
||||
|
||||
@@ -62,7 +62,7 @@ pub enum Error {
|
||||
#[snafu(display("Processor {processor}: expect string value, but got {v:?}"))]
|
||||
ProcessorExpectString {
|
||||
processor: String,
|
||||
v: crate::Value,
|
||||
v: vrl::value::Value,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
@@ -229,12 +229,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get timestamp"))]
|
||||
DateFailedToGetTimestamp {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid Pattern: '{s}'. {detail}"))]
|
||||
DissectInvalidPattern {
|
||||
s: String,
|
||||
@@ -372,13 +366,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Url decoding error"))]
|
||||
UrlEncodingDecode {
|
||||
#[snafu(source)]
|
||||
error: std::string::FromUtf8Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Invalid transform on_failure value: {value}"))]
|
||||
TransformOnFailureInvalidValue {
|
||||
value: String,
|
||||
@@ -433,17 +420,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Null type not supported"))]
|
||||
CoerceUnsupportedNullType {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Null type not supported when to coerce '{ty}' type"))]
|
||||
CoerceUnsupportedNullTypeTo {
|
||||
ty: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Type: {ty} value not supported for Epoch"))]
|
||||
CoerceUnsupportedEpochType {
|
||||
ty: String,
|
||||
@@ -556,12 +532,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Input value must be an object"))]
|
||||
InputValueMustBeObject {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Column options error"))]
|
||||
ColumnOptions {
|
||||
#[snafu(source)]
|
||||
@@ -575,12 +545,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Unsupported number type: {value:?}"))]
|
||||
UnsupportedNumberType {
|
||||
value: serde_json::Number,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Failed to parse json"))]
|
||||
JsonParse {
|
||||
#[snafu(source)]
|
||||
@@ -694,14 +658,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Float is not a number: {}", input_float))]
|
||||
FloatNaN {
|
||||
input_float: f64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid timestamp value: {}", input))]
|
||||
InvalidTimestamp {
|
||||
input: String,
|
||||
@@ -709,14 +665,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert bytes to utf8"))]
|
||||
BytesToUtf8 {
|
||||
#[snafu(source)]
|
||||
error: std::string::FromUtf8Error,
|
||||
#[snafu(display("Invalid epoch value '{}' for resolution '{}'", value, resolution))]
|
||||
InvalidEpochForResolution {
|
||||
value: i64,
|
||||
resolution: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Please don't use regex in Vrl script"))]
|
||||
VrlRegexValue {
|
||||
#[snafu(implicit)]
|
||||
@@ -808,6 +763,21 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Float is NaN"))]
|
||||
FloatIsNan {
|
||||
#[snafu(source)]
|
||||
error: ordered_float::FloatIsNan,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported type in pipeline: {}", ty))]
|
||||
UnsupportedTypeInPipeline {
|
||||
ty: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -858,7 +828,6 @@ impl ErrorExt for Error {
|
||||
| DateParseTimezone { .. }
|
||||
| DateParse { .. }
|
||||
| DateFailedToGetLocalTimezone { .. }
|
||||
| DateFailedToGetTimestamp { .. }
|
||||
| DissectInvalidPattern { .. }
|
||||
| DissectEmptyPattern { .. }
|
||||
| DissectSplitExceedsInput { .. }
|
||||
@@ -881,7 +850,6 @@ impl ErrorExt for Error {
|
||||
| RegexNoValidPattern { .. }
|
||||
| UrlEncodingInvalidMethod { .. }
|
||||
| DigestPatternInvalid { .. }
|
||||
| UrlEncodingDecode { .. }
|
||||
| TransformOnFailureInvalidValue { .. }
|
||||
| TransformElementMustBeMap { .. }
|
||||
| TransformFieldMustBeSet { .. }
|
||||
@@ -891,8 +859,6 @@ impl ErrorExt for Error {
|
||||
| TransformTimestampIndexCount { .. }
|
||||
| AutoTransformOneTimestamp { .. }
|
||||
| InvalidVersionNumber { .. }
|
||||
| CoerceUnsupportedNullType { .. }
|
||||
| CoerceUnsupportedNullTypeTo { .. }
|
||||
| CoerceUnsupportedEpochType { .. }
|
||||
| CoerceStringToType { .. }
|
||||
| CoerceJsonTypeTo { .. }
|
||||
@@ -908,10 +874,8 @@ impl ErrorExt for Error {
|
||||
| ValueYamlKeyMustBeString { .. }
|
||||
| YamlLoad { .. }
|
||||
| YamlParse { .. }
|
||||
| InputValueMustBeObject { .. }
|
||||
| ColumnOptions { .. }
|
||||
| UnsupportedIndexType { .. }
|
||||
| UnsupportedNumberType { .. }
|
||||
| IdentifyPipelineColumnTypeMismatch { .. }
|
||||
| JsonParse { .. }
|
||||
| JsonPathParse { .. }
|
||||
@@ -924,12 +888,14 @@ impl ErrorExt for Error {
|
||||
| InvalidTableSuffixTemplate { .. }
|
||||
| CompileVrl { .. }
|
||||
| ExecuteVrl { .. }
|
||||
| FloatNaN { .. }
|
||||
| BytesToUtf8 { .. }
|
||||
| InvalidTimestamp { .. }
|
||||
| VrlRegexValue { .. }
|
||||
| VrlReturnValue { .. }
|
||||
| PipelineMissing { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
FloatIsNan { .. }
|
||||
| InvalidEpochForResolution { .. }
|
||||
| UnsupportedTypeInPipeline { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,21 +19,19 @@ pub mod processor;
|
||||
pub mod transform;
|
||||
pub mod value;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use api::v1::Row;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use itertools::Itertools;
|
||||
use processor::{Processor, Processors};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use transform::Transforms;
|
||||
use value::Value;
|
||||
use vrl::core::Value as VrlValue;
|
||||
use yaml_rust::{Yaml, YamlLoader};
|
||||
|
||||
use crate::dispatcher::{Dispatcher, Rule};
|
||||
use crate::error::{
|
||||
AutoTransformOneTimestampSnafu, Error, InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu,
|
||||
InvalidVersionNumberSnafu, Result, YamlLoadSnafu, YamlParseSnafu,
|
||||
AutoTransformOneTimestampSnafu, Error, IntermediateKeyIndexSnafu, InvalidVersionNumberSnafu,
|
||||
Result, YamlLoadSnafu, YamlParseSnafu,
|
||||
};
|
||||
use crate::etl::processor::ProcessorKind;
|
||||
use crate::etl::transform::transformer::greptime::values_to_row;
|
||||
@@ -228,7 +226,7 @@ impl DispatchedTo {
|
||||
#[derive(Debug)]
|
||||
pub enum PipelineExecOutput {
|
||||
Transformed(TransformedOutput),
|
||||
DispatchedTo(DispatchedTo, Value),
|
||||
DispatchedTo(DispatchedTo, VrlValue),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -261,40 +259,6 @@ impl PipelineExecOutput {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_to_map(val: serde_json::Value) -> Result<Value> {
|
||||
match val {
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut intermediate_state = BTreeMap::new();
|
||||
for (k, v) in map {
|
||||
intermediate_state.insert(k, Value::try_from(v)?);
|
||||
}
|
||||
Ok(Value::Map(intermediate_state.into()))
|
||||
}
|
||||
_ => InputValueMustBeObjectSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_array_to_map(val: Vec<serde_json::Value>) -> Result<Vec<Value>> {
|
||||
val.into_iter().map(json_to_map).collect()
|
||||
}
|
||||
|
||||
pub fn simd_json_to_map(val: simd_json::OwnedValue) -> Result<Value> {
|
||||
match val {
|
||||
simd_json::OwnedValue::Object(map) => {
|
||||
let mut intermediate_state = BTreeMap::new();
|
||||
for (k, v) in map.into_iter() {
|
||||
intermediate_state.insert(k, Value::try_from(v)?);
|
||||
}
|
||||
Ok(Value::Map(intermediate_state.into()))
|
||||
}
|
||||
_ => InputValueMustBeObjectSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simd_json_array_to_map(val: Vec<simd_json::OwnedValue>) -> Result<Vec<Value>> {
|
||||
val.into_iter().map(simd_json_to_map).collect()
|
||||
}
|
||||
|
||||
impl Pipeline {
|
||||
fn is_v1(&self) -> bool {
|
||||
self.doc_version == PipelineDocVersion::V1
|
||||
@@ -302,7 +266,7 @@ impl Pipeline {
|
||||
|
||||
pub fn exec_mut(
|
||||
&self,
|
||||
mut val: Value,
|
||||
mut val: VrlValue,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
) -> Result<PipelineExecOutput> {
|
||||
@@ -333,9 +297,9 @@ impl Pipeline {
|
||||
table_suffix,
|
||||
}));
|
||||
}
|
||||
// continue v2 process, check ts column and set the rest fields with auto-transform
|
||||
// continue v2 process, and set the rest fields with auto-transform
|
||||
// if transformer presents, then ts has been set
|
||||
values_to_row(schema_info, val, pipeline_ctx, Some(values))?
|
||||
values_to_row(schema_info, val, pipeline_ctx, Some(values), false)?
|
||||
}
|
||||
TransformerMode::AutoTransform(ts_name, time_unit) => {
|
||||
// infer ts from the context
|
||||
@@ -347,7 +311,7 @@ impl Pipeline {
|
||||
));
|
||||
let n_ctx =
|
||||
PipelineContext::new(&def, pipeline_ctx.pipeline_param, pipeline_ctx.channel);
|
||||
values_to_row(schema_info, val, &n_ctx, None)?
|
||||
values_to_row(schema_info, val, &n_ctx, None, true)?
|
||||
}
|
||||
};
|
||||
|
||||
@@ -409,11 +373,14 @@ macro_rules! setup_pipeline {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::Rows;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{self, ColumnDataType, SemanticType};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::KeyString;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -454,7 +421,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let payload = json_to_map(input_value).unwrap();
|
||||
let payload = input_value.into();
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -515,9 +482,10 @@ transform:
|
||||
&pipeline_param,
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let mut payload = BTreeMap::new();
|
||||
payload.insert("message".to_string(), Value::String(message));
|
||||
let payload = Value::Map(payload.into());
|
||||
let payload = VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("message"),
|
||||
VrlValue::Bytes(Bytes::from(message)),
|
||||
)]));
|
||||
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
@@ -613,7 +581,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let payload = json_to_map(input_value).unwrap();
|
||||
let payload = input_value.into();
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -666,7 +634,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let schema = pipeline.schemas().unwrap().clone();
|
||||
let result = json_to_map(input_value).unwrap();
|
||||
let result = input_value.into();
|
||||
|
||||
let row = pipeline
|
||||
.exec_mut(result, &pipeline_ctx, &mut schema_info)
|
||||
@@ -732,7 +700,7 @@ transform:
|
||||
assert_eq!(
|
||||
dispatcher.rules[0],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("http".to_string()),
|
||||
value: VrlValue::Bytes(Bytes::from("http")),
|
||||
table_suffix: "http_events".to_string(),
|
||||
pipeline: None
|
||||
}
|
||||
@@ -741,7 +709,7 @@ transform:
|
||||
assert_eq!(
|
||||
dispatcher.rules[1],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("database".to_string()),
|
||||
value: VrlValue::Bytes(Bytes::from("database")),
|
||||
table_suffix: "db_events".to_string(),
|
||||
pipeline: Some("database_pipeline".to_string()),
|
||||
}
|
||||
|
||||
@@ -19,10 +19,10 @@ use ahash::{HashMap, HashMapExt};
|
||||
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{Result, ValueMustBeMapSnafu};
|
||||
use crate::tablesuffix::TableSuffixTemplate;
|
||||
use crate::Value;
|
||||
|
||||
const GREPTIME_AUTO_CREATE_TABLE: &str = "greptime_auto_create_table";
|
||||
const GREPTIME_TTL: &str = "greptime_ttl";
|
||||
@@ -86,32 +86,34 @@ impl ContextOpt {
|
||||
}
|
||||
|
||||
impl ContextOpt {
|
||||
pub fn from_pipeline_map_to_opt(pipeline_map: &mut Value) -> Result<Self> {
|
||||
let pipeline_map = pipeline_map.as_map_mut().context(ValueMustBeMapSnafu)?;
|
||||
pub fn from_pipeline_map_to_opt(value: &mut VrlValue) -> Result<Self> {
|
||||
let map = value.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
let mut opt = Self::default();
|
||||
for k in PIPELINE_HINT_KEYS {
|
||||
if let Some(v) = pipeline_map.remove(k) {
|
||||
if let Some(v) = map.remove(k) {
|
||||
let v = v.to_string_lossy().to_string();
|
||||
match k {
|
||||
GREPTIME_AUTO_CREATE_TABLE => {
|
||||
opt.auto_create_table = Some(v.to_str_value());
|
||||
opt.auto_create_table = Some(v);
|
||||
}
|
||||
GREPTIME_TTL => {
|
||||
opt.ttl = Some(v.to_str_value());
|
||||
opt.ttl = Some(v);
|
||||
}
|
||||
GREPTIME_APPEND_MODE => {
|
||||
opt.append_mode = Some(v.to_str_value());
|
||||
opt.append_mode = Some(v);
|
||||
}
|
||||
GREPTIME_MERGE_MODE => {
|
||||
opt.merge_mode = Some(v.to_str_value());
|
||||
opt.merge_mode = Some(v);
|
||||
}
|
||||
GREPTIME_PHYSICAL_TABLE => {
|
||||
opt.physical_table = Some(v.to_str_value());
|
||||
opt.physical_table = Some(v);
|
||||
}
|
||||
GREPTIME_SKIP_WAL => {
|
||||
opt.skip_wal = Some(v.to_str_value());
|
||||
opt.skip_wal = Some(v);
|
||||
}
|
||||
GREPTIME_TABLE_SUFFIX => {
|
||||
opt.table_suffix = Some(v.to_str_value());
|
||||
opt.table_suffix = Some(v);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -123,7 +125,7 @@ impl ContextOpt {
|
||||
pub(crate) fn resolve_table_suffix(
|
||||
&mut self,
|
||||
table_suffix: Option<&TableSuffixTemplate>,
|
||||
pipeline_map: &Value,
|
||||
pipeline_map: &VrlValue,
|
||||
) -> Option<String> {
|
||||
self.table_suffix
|
||||
.take()
|
||||
|
||||
@@ -28,7 +28,7 @@ pub mod regex;
|
||||
pub mod select;
|
||||
pub mod simple_extract;
|
||||
pub mod urlencoding;
|
||||
pub mod vrl;
|
||||
pub mod vrl_processor;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -47,6 +47,7 @@ use letter::LetterProcessor;
|
||||
use regex::RegexProcessor;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::UrlEncodingProcessor;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
Error, FailedParseFieldFromStringSnafu, FieldMustBeTypeSnafu, InvalidFieldRenameSnafu,
|
||||
@@ -57,8 +58,7 @@ use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::json_parse::JsonParseProcessor;
|
||||
use crate::etl::processor::select::SelectProcessor;
|
||||
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
|
||||
use crate::etl::processor::vrl::VrlProcessor;
|
||||
use crate::Value;
|
||||
use crate::etl::processor::vrl_processor::VrlProcessor;
|
||||
|
||||
const FIELD_NAME: &str = "field";
|
||||
const FIELDS_NAME: &str = "fields";
|
||||
@@ -123,7 +123,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
|
||||
fn ignore_missing(&self) -> bool;
|
||||
|
||||
/// Execute the processor on a vector which be preprocessed by the pipeline
|
||||
fn exec_mut(&self, val: Value) -> Result<Value>;
|
||||
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -224,7 +224,7 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
|
||||
json_parse::PROCESSOR_JSON_PARSE => {
|
||||
ProcessorKind::JsonParse(JsonParseProcessor::try_from(value)?)
|
||||
}
|
||||
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
|
||||
vrl_processor::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
|
||||
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
@@ -18,20 +18,22 @@
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use ordered_float::NotNan;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::decode;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
CmcdMissingKeySnafu, CmcdMissingValueSnafu, Error, FailedToParseFloatKeySnafu,
|
||||
FailedToParseIntKeySnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
FailedToParseIntKeySnafu, FloatIsNanSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_CMCD: &str = "cmcd";
|
||||
|
||||
@@ -76,42 +78,43 @@ const CMCD_KEYS: [&str; 18] = [
|
||||
];
|
||||
|
||||
/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
|
||||
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value> {
|
||||
Ok(Value::Boolean(true))
|
||||
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<VrlValue> {
|
||||
Ok(VrlValue::Boolean(true))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
|
||||
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val: i64 = v
|
||||
.parse()
|
||||
.context(FailedToParseIntKeySnafu { key: k, value: v })?;
|
||||
Ok(Value::Int64(val))
|
||||
Ok(VrlValue::Integer(val))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
|
||||
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
Ok(Value::String(v.to_string()))
|
||||
Ok(VrlValue::Bytes(Bytes::from(v.to_string())))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_NOR
|
||||
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val = match decode(v) {
|
||||
Ok(val) => val.to_string(),
|
||||
Err(_) => v.to_string(),
|
||||
};
|
||||
Ok(Value::String(val))
|
||||
Ok(VrlValue::Bytes(Bytes::from(val)))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_PR
|
||||
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val: f64 = v
|
||||
.parse()
|
||||
.context(FailedToParseFloatKeySnafu { key: k, value: v })?;
|
||||
Ok(Value::Float64(val))
|
||||
let val = NotNan::new(val).context(FloatIsNanSnafu)?;
|
||||
Ok(VrlValue::Float(val))
|
||||
}
|
||||
|
||||
/// Common Media Client Data Specification:
|
||||
@@ -156,11 +159,11 @@ pub struct CmcdProcessor {
|
||||
}
|
||||
|
||||
impl CmcdProcessor {
|
||||
fn generate_key(prefix: &str, key: &str) -> String {
|
||||
format!("{}_{}", prefix, key)
|
||||
fn generate_key(prefix: &str, key: &str) -> KeyString {
|
||||
KeyString::from(format!("{}_{}", prefix, key))
|
||||
}
|
||||
|
||||
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<String, Value>> {
|
||||
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut working_set = BTreeMap::new();
|
||||
|
||||
let parts = value.split(',');
|
||||
@@ -250,16 +253,18 @@ impl Processor for CmcdProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let name = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(name) {
|
||||
Some(Value::String(s)) => {
|
||||
let results = self.parse(field.target_or_input_field(), s)?;
|
||||
val.extend(results.into())?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let s = String::from_utf8_lossy(s);
|
||||
let results = self.parse(field.target_or_input_field(), &s)?;
|
||||
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -288,7 +293,6 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_cmcd() {
|
||||
@@ -297,23 +301,23 @@ mod tests {
|
||||
"sid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
)],
|
||||
),
|
||||
(
|
||||
"br%3D3200%2Cbs%2Cd%3D4004%2Cmtp%3D25400%2Cot%3Dv%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Ctb%3D6000",
|
||||
vec![
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_ot", Value::String("v".into())),
|
||||
("prefix_rtp", Value::Int64(15000)),
|
||||
("prefix_br", Value::Int64(3200)),
|
||||
("prefix_tb", Value::Int64(6000)),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
|
||||
("prefix_rtp", VrlValue::Integer(15000)),
|
||||
("prefix_br", VrlValue::Integer(3200)),
|
||||
("prefix_tb", VrlValue::Integer(6000)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_mtp", Value::Int64(25400)),
|
||||
("prefix_mtp", VrlValue::Integer(25400)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -322,16 +326,16 @@ mod tests {
|
||||
vec![
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_rtp", Value::Int64(15000)),
|
||||
("prefix_rtp", VrlValue::Integer(15000)),
|
||||
],
|
||||
),
|
||||
(
|
||||
"bs%2Csu",
|
||||
vec![
|
||||
("prefix_su", Value::Boolean(true)),
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_su", VrlValue::Boolean(true)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -346,7 +350,7 @@ mod tests {
|
||||
// "prefix_com.examplemyStringKey",
|
||||
// Value::String("\"myStringValue\"".into()),
|
||||
// ),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -354,11 +358,11 @@ mod tests {
|
||||
vec![
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/segment35.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/segment35.m4v\"")),
|
||||
|
||||
),
|
||||
],
|
||||
@@ -366,56 +370,56 @@ mod tests {
|
||||
(
|
||||
"nrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
"nor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/track.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
"bl%3D21300%2Cbr%3D3200%2Cbs%2Ccid%3D%22faec5fc2-ac30-11eabb37-0242ac130002%22%2Cd%3D4004%2Cdl%3D18500%2Cmtp%3D48100%2Cnor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Cot%3Dv%2Cpr%3D1.08%2Crtp%3D12000%2Csf%3Dd%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Cst%3Dv%2Csu%2Ctb%3D6000",
|
||||
vec![
|
||||
("prefix_bl", Value::Int64(21300)),
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_st", Value::String("v".into())),
|
||||
("prefix_ot", Value::String("v".into())),
|
||||
("prefix_bl", VrlValue::Integer(21300)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
("prefix_st", VrlValue::Bytes(Bytes::from("v"))),
|
||||
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_tb", Value::Int64(6000)),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_tb", VrlValue::Integer(6000)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
(
|
||||
"prefix_cid",
|
||||
Value::String("\"faec5fc2-ac30-11eabb37-0242ac130002\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"faec5fc2-ac30-11eabb37-0242ac130002\"")),
|
||||
),
|
||||
("prefix_mtp", Value::Int64(48100)),
|
||||
("prefix_rtp", Value::Int64(12000)),
|
||||
("prefix_mtp", VrlValue::Integer(48100)),
|
||||
("prefix_rtp", VrlValue::Integer(12000)),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/track.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
|
||||
),
|
||||
("prefix_sf", Value::String("d".into())),
|
||||
("prefix_br", Value::Int64(3200)),
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_pr", Value::Float64(1.08)),
|
||||
("prefix_su", Value::Boolean(true)),
|
||||
("prefix_dl", Value::Int64(18500)),
|
||||
("prefix_sf", VrlValue::Bytes(Bytes::from("d"))),
|
||||
("prefix_br", VrlValue::Integer(3200)),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
("prefix_pr", VrlValue::Float(NotNan::new(1.08).unwrap())),
|
||||
("prefix_su", VrlValue::Boolean(true)),
|
||||
("prefix_dl", VrlValue::Integer(18500)),
|
||||
],
|
||||
),
|
||||
];
|
||||
@@ -432,8 +436,8 @@ mod tests {
|
||||
|
||||
let expected = vec
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect::<BTreeMap<String, Value>>();
|
||||
.map(|(k, v)| (KeyString::from(k.to_string()), v))
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
let actual = processor.parse("prefix", &decoded).unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
@@ -20,17 +20,19 @@ use csv::{ReaderBuilder, Trim};
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
CsvNoRecordSnafu, CsvQuoteNameSnafu, CsvReadSnafu, CsvSeparatorNameSnafu, Error,
|
||||
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_CSV: &str = "csv";
|
||||
|
||||
@@ -60,8 +62,8 @@ pub struct CsvProcessor {
|
||||
|
||||
impl CsvProcessor {
|
||||
// process the csv format string to a map with target_fields as keys
|
||||
fn process(&self, val: &str) -> Result<BTreeMap<String, Value>> {
|
||||
let mut reader = self.reader.from_reader(val.as_bytes());
|
||||
fn process(&self, val: &[u8]) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut reader = self.reader.from_reader(val);
|
||||
|
||||
if let Some(result) = reader.records().next() {
|
||||
let record: csv::StringRecord = result.context(CsvReadSnafu)?;
|
||||
@@ -71,17 +73,18 @@ impl CsvProcessor {
|
||||
.iter()
|
||||
.zip_longest(record.iter())
|
||||
.filter_map(|zipped| match zipped {
|
||||
Both(target_field, val) => {
|
||||
Some((target_field.clone(), Value::String(val.into())))
|
||||
}
|
||||
Both(target_field, val) => Some((
|
||||
KeyString::from(target_field.clone()),
|
||||
VrlValue::Bytes(Bytes::from(val.to_string())),
|
||||
)),
|
||||
// if target fields are more than extracted fields, fill the rest with empty value
|
||||
Left(target_field) => {
|
||||
let value = self
|
||||
.empty_value
|
||||
.as_ref()
|
||||
.map(|s| Value::String(s.clone()))
|
||||
.unwrap_or(Value::Null);
|
||||
Some((target_field.clone(), value))
|
||||
.map(|s| VrlValue::Bytes(Bytes::from(s.clone())))
|
||||
.unwrap_or(VrlValue::Null);
|
||||
Some((KeyString::from(target_field.clone()), value))
|
||||
}
|
||||
// if extracted fields are more than target fields, ignore the rest
|
||||
Right(_) => None,
|
||||
@@ -190,16 +193,18 @@ impl Processor for CsvProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let name = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match val.get(name) {
|
||||
Some(Value::String(v)) => {
|
||||
Some(VrlValue::Bytes(v)) => {
|
||||
let results = self.process(v)?;
|
||||
val.extend(results.into())?;
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -238,11 +243,11 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -264,12 +269,12 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
("c".into(), Value::Null),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
(KeyString::from("c"), VrlValue::Null),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -289,12 +294,15 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
("c".into(), Value::String("default".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
(
|
||||
KeyString::from("c"),
|
||||
VrlValue::Bytes(Bytes::from("default")),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -315,11 +323,11 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
@@ -14,22 +14,22 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, NaiveDateTime};
|
||||
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||
use chrono_tz::Tz;
|
||||
use lazy_static::lazy_static;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DateFailedToGetLocalTimezoneSnafu, DateFailedToGetTimestampSnafu, DateParseSnafu,
|
||||
DateParseTimezoneSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorFailedToParseStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
DateFailedToGetLocalTimezoneSnafu, DateParseSnafu, DateParseTimezoneSnafu, Error,
|
||||
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorFailedToParseStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
|
||||
FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_DATE: &str = "date";
|
||||
|
||||
@@ -162,7 +162,7 @@ pub struct DateProcessor {
|
||||
}
|
||||
|
||||
impl DateProcessor {
|
||||
fn parse(&self, val: &str) -> Result<Timestamp> {
|
||||
fn parse(&self, val: &str) -> Result<DateTime<Utc>> {
|
||||
let mut tz = Tz::UTC;
|
||||
if let Some(timezone) = &self.timezone {
|
||||
tz = timezone.parse::<Tz>().context(DateParseTimezoneSnafu {
|
||||
@@ -171,8 +171,8 @@ impl DateProcessor {
|
||||
}
|
||||
|
||||
for fmt in self.formats.iter() {
|
||||
if let Ok(ns) = try_parse(val, fmt, tz) {
|
||||
return Ok(Timestamp::Nanosecond(ns));
|
||||
if let Ok(utc_ts) = try_parse(val, fmt, tz) {
|
||||
return Ok(utc_ts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,16 +193,19 @@ impl Processor for DateProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let timestamp = self.parse(s)?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let timestamp = self.parse(String::from_utf8_lossy(s).as_ref())?;
|
||||
let output_key = field.target_or_input_field();
|
||||
val.insert(output_key.to_string(), Value::Timestamp(timestamp))?;
|
||||
val.insert(KeyString::from(output_key), VrlValue::Timestamp(timestamp));
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -224,21 +227,19 @@ impl Processor for DateProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
/// try to parse val with timezone first, if failed, parse without timezone
|
||||
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64> {
|
||||
// parse the datetime with timezone info
|
||||
// if failed, try to parse using naive date time and add tz info
|
||||
// finally convert the datetime to utc
|
||||
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<DateTime<Utc>> {
|
||||
if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
|
||||
Ok(dt
|
||||
.timestamp_nanos_opt()
|
||||
.context(DateFailedToGetTimestampSnafu)?)
|
||||
Ok(dt.to_utc())
|
||||
} else {
|
||||
let dt = NaiveDateTime::parse_from_str(val, fmt)
|
||||
.context(DateParseSnafu { value: val })?
|
||||
.and_local_timezone(tz)
|
||||
.single()
|
||||
.context(DateFailedToGetLocalTimezoneSnafu)?;
|
||||
Ok(dt
|
||||
.timestamp_nanos_opt()
|
||||
.context(DateFailedToGetTimestampSnafu)?)
|
||||
Ok(dt.to_utc())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,15 +21,17 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DECOLORIZE: &str = "decolorize";
|
||||
|
||||
@@ -43,13 +45,15 @@ pub struct DecolorizeProcessor {
|
||||
}
|
||||
|
||||
impl DecolorizeProcessor {
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
Ok(Value::String(RE.replace_all(val, "").into_owned()))
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
Ok(VrlValue::Bytes(Bytes::from(
|
||||
RE.replace_all(val, "").to_string(),
|
||||
)))
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_DECOLORIZE,
|
||||
v: val.clone(),
|
||||
@@ -101,11 +105,12 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -117,7 +122,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,16 +141,19 @@ mod tests {
|
||||
ignore_missing: false,
|
||||
};
|
||||
|
||||
let val = Value::String("\x1b[32mGreen\x1b[0m".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("\x1b[32mGreen\x1b[0m".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("Green".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("Green".to_string())));
|
||||
|
||||
let val = Value::String("Plain text".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("Plain text".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("Plain text".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("Plain text".to_string()))
|
||||
);
|
||||
|
||||
let val = Value::String("\x1b[46mfoo\x1b[0m bar".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("\x1b[46mfoo\x1b[0m bar".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("foo bar".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("foo bar".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,16 +23,17 @@ use std::borrow::Cow;
|
||||
|
||||
use regex::Regex;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DigestPatternInvalidSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DIGEST: &str = "digest";
|
||||
|
||||
@@ -100,7 +101,7 @@ impl DigestProcessor {
|
||||
re.replace_all(val, "").to_string()
|
||||
}
|
||||
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
let mut input = Cow::from(val);
|
||||
for pattern in &self.patterns {
|
||||
if let Cow::Owned(new_string) = pattern.replace_all(&input, "") {
|
||||
@@ -108,12 +109,12 @@ impl DigestProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Value::String(input.into_owned()))
|
||||
Ok(VrlValue::Bytes(Bytes::from(input.to_string())))
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_DIGEST,
|
||||
v: val.clone(),
|
||||
@@ -200,11 +201,12 @@ impl crate::etl::processor::Processor for DigestProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -216,7 +218,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,24 +239,31 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Ip.regex()],
|
||||
};
|
||||
|
||||
let input = Value::String("192.168.1.1".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("192.168.1.1".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
let input = Value::String("192.168.1.1:8080".to_string());
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
let input = VrlValue::Bytes(Bytes::from("192.168.1.1:8080".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("not an ip".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("not an ip".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("not an ip".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("not an ip".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -265,29 +274,40 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Uuid.regex()],
|
||||
};
|
||||
// UUID v4
|
||||
let input = Value::String("123e4567-e89b-12d3-a456-426614174000".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"123e4567-e89b-12d3-a456-426614174000".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID v1
|
||||
let input = Value::String("6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID v5
|
||||
let input = Value::String("886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID with uppercase letters
|
||||
let input = Value::String("A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Negative case
|
||||
let input = Value::String("not a uuid".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("not a uuid".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("not a uuid".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("not a uuid".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -299,45 +319,48 @@ mod tests {
|
||||
};
|
||||
|
||||
// Basic brackets
|
||||
let input = Value::String("[content]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content]".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("(content)".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("(content)".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Chinese brackets
|
||||
let input = Value::String("「content」".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("「content」".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("『content』".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("『content』".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("【content】".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("【content】".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Unmatched/unclosed brackets should not match
|
||||
let input = Value::String("[content".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("[content".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("[content".to_string())));
|
||||
|
||||
let input = Value::String("content]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("content]".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("content]".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("content]".to_string())));
|
||||
|
||||
// Bad case
|
||||
let input = Value::String("[content}".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content}".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Negative case
|
||||
let input = Value::String("no brackets".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no brackets".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no brackets".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no brackets".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -348,16 +371,19 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Quoted.regex()],
|
||||
};
|
||||
|
||||
let input = Value::String("\"quoted content\"".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("\"quoted content\"".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("no quotes".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no quotes".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no quotes".to_string()));
|
||||
let input = Value::String("".to_string());
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no quotes".to_string()))
|
||||
);
|
||||
let input = VrlValue::Bytes(Bytes::from("".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -368,15 +394,18 @@ mod tests {
|
||||
patterns: vec![Regex::new(r"\d+").unwrap()],
|
||||
};
|
||||
|
||||
let input = Value::String("12345".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("12345".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("no digits".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no digits".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no digits".to_string()));
|
||||
let input = Value::String("".to_string());
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no digits".to_string()))
|
||||
);
|
||||
let input = VrlValue::Bytes(Bytes::from("".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ use std::ops::Deref;
|
||||
use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
|
||||
use itertools::Itertools;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DissectAppendOrderAlreadySetSnafu, DissectConsecutiveNamesSnafu, DissectEmptyPatternSnafu,
|
||||
@@ -24,13 +26,13 @@ use crate::error::{
|
||||
DissectNoMatchingPatternSnafu, DissectOrderOnlyAppendModifierSnafu,
|
||||
DissectOrderOnlyAppendSnafu, DissectSplitExceedsInputSnafu, DissectSplitNotMatchInputSnafu,
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
|
||||
Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERNS_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DISSECT: &str = "dissect";
|
||||
|
||||
@@ -421,7 +423,7 @@ impl DissectProcessor {
|
||||
name: &'a Name,
|
||||
value: String,
|
||||
appends: &mut HashMap<&'a String, Vec<(String, u32)>>,
|
||||
map: &mut Vec<(&'a String, Value)>,
|
||||
map: &mut Vec<(&'a String, VrlValue)>,
|
||||
) {
|
||||
match name.start_modifier {
|
||||
Some(StartModifier::NamedSkip) => {
|
||||
@@ -438,12 +440,16 @@ impl DissectProcessor {
|
||||
// because transform can know the key name
|
||||
}
|
||||
None => {
|
||||
map.push((&name.name, Value::String(value)));
|
||||
map.push((&name.name, VrlValue::Bytes(Bytes::from(value))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_pattern(&self, chs: &[char], pattern: &Pattern) -> Result<Vec<(String, Value)>> {
|
||||
fn process_pattern(
|
||||
&self,
|
||||
chs: &[char],
|
||||
pattern: &Pattern,
|
||||
) -> Result<Vec<(KeyString, VrlValue)>> {
|
||||
let mut map = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
@@ -523,14 +529,17 @@ impl DissectProcessor {
|
||||
for (name, mut values) in appends {
|
||||
values.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
let value = values.into_iter().map(|(a, _)| a).join(sep);
|
||||
map.push((name, Value::String(value)));
|
||||
map.push((name, VrlValue::Bytes(Bytes::from(value))));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(map.into_iter().map(|(k, v)| (k.to_string(), v)).collect())
|
||||
Ok(map
|
||||
.into_iter()
|
||||
.map(|(k, v)| (KeyString::from(k.clone()), v))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn process(&self, val: &str) -> Result<Vec<(String, Value)>> {
|
||||
fn process(&self, val: &str) -> Result<Vec<(KeyString, VrlValue)>> {
|
||||
let chs = val.chars().collect::<Vec<char>>();
|
||||
|
||||
for pattern in &self.patterns {
|
||||
@@ -600,17 +609,18 @@ impl Processor for DissectProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(val_str)) => {
|
||||
let r = self.process(val_str)?;
|
||||
Some(VrlValue::Bytes(val_str)) => {
|
||||
let r = self.process(String::from_utf8_lossy(val_str).as_ref())?;
|
||||
for (k, v) in r {
|
||||
val.insert(k, v)?;
|
||||
val.insert(k, v);
|
||||
}
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -639,17 +649,18 @@ fn is_valid_char(ch: char) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ahash::HashMap;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use super::{DissectProcessor, EndModifier, Name, Part, StartModifier};
|
||||
use crate::etl::processor::dissect::Pattern;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
fn assert(pattern_str: &str, input: &str, expected: HashMap<String, Value>) {
|
||||
fn assert(pattern_str: &str, input: &str, expected: HashMap<KeyString, VrlValue>) {
|
||||
let chs = input.chars().collect::<Vec<char>>();
|
||||
let patterns: Vec<Pattern> = vec![pattern_str.parse().unwrap()];
|
||||
|
||||
let processor = DissectProcessor::default();
|
||||
let result: HashMap<String, Value> = processor
|
||||
let result: HashMap<KeyString, VrlValue> = processor
|
||||
.process_pattern(&chs, &patterns[0])
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
@@ -991,8 +1002,13 @@ mod tests {
|
||||
("httpversion", "1.0"),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
|
||||
.collect::<HashMap<String, Value>>();
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<KeyString, VrlValue>>();
|
||||
|
||||
{
|
||||
// pattern start with Name
|
||||
@@ -1032,9 +1048,12 @@ mod tests {
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1042,7 +1061,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1063,9 +1082,12 @@ mod tests {
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1073,7 +1095,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1090,9 +1112,12 @@ mod tests {
|
||||
)]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1100,7 +1125,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,24 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
|
||||
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, InvalidEpochForResolutionSnafu,
|
||||
KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::time::{
|
||||
use crate::etl::value::{
|
||||
MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
|
||||
MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
|
||||
SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
|
||||
};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
|
||||
const RESOLUTION_NAME: &str = "resolution";
|
||||
@@ -43,6 +45,18 @@ pub(crate) enum Resolution {
|
||||
Nano,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Resolution {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let text = match self {
|
||||
Resolution::Second => SECOND_RESOLUTION,
|
||||
Resolution::Milli => MILLISECOND_RESOLUTION,
|
||||
Resolution::Micro => MICROSECOND_RESOLUTION,
|
||||
Resolution::Nano => NANOSECOND_RESOLUTION,
|
||||
};
|
||||
write!(f, "{}", text)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Resolution {
|
||||
type Error = Error;
|
||||
|
||||
@@ -84,43 +98,36 @@ pub struct EpochProcessor {
|
||||
}
|
||||
|
||||
impl EpochProcessor {
|
||||
fn parse(&self, val: &Value) -> Result<Timestamp> {
|
||||
let t: i64 = match val {
|
||||
Value::String(s) => s
|
||||
.parse::<i64>()
|
||||
.context(FailedToParseIntSnafu { value: s })?,
|
||||
Value::Int16(i) => *i as i64,
|
||||
Value::Int32(i) => *i as i64,
|
||||
Value::Int64(i) => *i,
|
||||
Value::Uint8(i) => *i as i64,
|
||||
Value::Uint16(i) => *i as i64,
|
||||
Value::Uint32(i) => *i as i64,
|
||||
Value::Uint64(i) => *i as i64,
|
||||
Value::Float32(f) => *f as i64,
|
||||
Value::Float64(f) => *f as i64,
|
||||
|
||||
Value::Timestamp(t) => match self.resolution {
|
||||
Resolution::Second => t.timestamp(),
|
||||
Resolution::Milli => t.timestamp_millis(),
|
||||
Resolution::Micro => t.timestamp_micros(),
|
||||
Resolution::Nano => t.timestamp_nanos(),
|
||||
},
|
||||
|
||||
_ => {
|
||||
return ProcessorUnsupportedValueSnafu {
|
||||
processor: PROCESSOR_EPOCH,
|
||||
val: val.to_string(),
|
||||
fn parse(&self, val: &VrlValue) -> Result<DateTime<Utc>> {
|
||||
let t: i64 =
|
||||
match val {
|
||||
VrlValue::Bytes(bytes) => String::from_utf8_lossy(bytes).parse::<i64>().context(
|
||||
FailedToParseIntSnafu {
|
||||
value: val.to_string_lossy(),
|
||||
},
|
||||
)?,
|
||||
VrlValue::Integer(ts) => *ts,
|
||||
VrlValue::Float(not_nan) => not_nan.into_inner() as i64,
|
||||
VrlValue::Timestamp(date_time) => return Ok(*date_time),
|
||||
_ => {
|
||||
return ProcessorUnsupportedValueSnafu {
|
||||
processor: PROCESSOR_EPOCH,
|
||||
val: val.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
match self.resolution {
|
||||
Resolution::Second => Ok(Timestamp::Second(t)),
|
||||
Resolution::Milli => Ok(Timestamp::Millisecond(t)),
|
||||
Resolution::Micro => Ok(Timestamp::Microsecond(t)),
|
||||
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
|
||||
Resolution::Second => DateTime::from_timestamp(t, 0),
|
||||
Resolution::Milli => DateTime::from_timestamp_millis(t),
|
||||
Resolution::Micro => DateTime::from_timestamp_micros(t),
|
||||
Resolution::Nano => Some(DateTime::from_timestamp_nanos(t)),
|
||||
}
|
||||
.context(InvalidEpochForResolutionSnafu {
|
||||
value: t,
|
||||
resolution: self.resolution.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,11 +181,12 @@ impl Processor for EpochProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -190,7 +198,10 @@ impl Processor for EpochProcessor {
|
||||
Some(v) => {
|
||||
let timestamp = self.parse(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), Value::Timestamp(timestamp))?;
|
||||
val.insert(
|
||||
KeyString::from(output_index.to_string()),
|
||||
VrlValue::Timestamp(timestamp),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -200,8 +211,12 @@ impl Processor for EpochProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::DateTime;
|
||||
use ordered_float::NotNan;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use super::EpochProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_parse_epoch() {
|
||||
@@ -211,15 +226,15 @@ mod tests {
|
||||
};
|
||||
|
||||
let values = [
|
||||
Value::String("1573840000".into()),
|
||||
Value::Int32(1573840000),
|
||||
Value::Uint64(1573840000),
|
||||
Value::Float32(1573840000.0),
|
||||
VrlValue::Bytes(Bytes::from("1573840000")),
|
||||
VrlValue::Integer(1573840000),
|
||||
VrlValue::Integer(1573840000),
|
||||
VrlValue::Float(NotNan::new(1573840000.0).unwrap()),
|
||||
];
|
||||
|
||||
for value in values {
|
||||
let parsed = processor.parse(&value).unwrap();
|
||||
assert_eq!(parsed, super::Timestamp::Second(1573840000));
|
||||
assert_eq!(parsed, DateTime::from_timestamp(1573840000, 0).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,17 +14,19 @@
|
||||
|
||||
use regex::Regex;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, GsubPatternRequiredSnafu, GsubReplacementRequiredSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, RegexSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_GSUB: &str = "gsub";
|
||||
|
||||
@@ -40,16 +42,16 @@ pub struct GsubProcessor {
|
||||
}
|
||||
|
||||
impl GsubProcessor {
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
let new_val = self.pattern.replace_all(val, &self.replacement).to_string();
|
||||
let val = Value::String(new_val);
|
||||
let val = VrlValue::Bytes(Bytes::from(new_val));
|
||||
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_GSUB,
|
||||
v: val.clone(),
|
||||
@@ -117,11 +119,12 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -133,7 +136,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -145,7 +148,6 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::etl::processor::gsub::GsubProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_string_value() {
|
||||
@@ -156,9 +158,9 @@ mod tests {
|
||||
ignore_missing: false,
|
||||
};
|
||||
|
||||
let val = Value::String("123".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("123"));
|
||||
let result = processor.process(&val).unwrap();
|
||||
|
||||
assert_eq!(result, Value::String("xxx".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("xxx")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,17 +13,18 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, JoinSeparatorRequiredSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, SEPARATOR_NAME,
|
||||
};
|
||||
use crate::etl::value::{Array, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_JOIN: &str = "join";
|
||||
|
||||
@@ -36,14 +37,14 @@ pub struct JoinProcessor {
|
||||
}
|
||||
|
||||
impl JoinProcessor {
|
||||
fn process(&self, arr: &Array) -> Result<Value> {
|
||||
fn process(&self, arr: &[VrlValue]) -> Result<VrlValue> {
|
||||
let val = arr
|
||||
.iter()
|
||||
.map(|v| v.to_str_value())
|
||||
.collect::<Vec<String>>()
|
||||
.map(|v| v.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(&self.separator);
|
||||
|
||||
Ok(Value::String(val))
|
||||
Ok(VrlValue::Bytes(Bytes::from(val)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,16 +95,17 @@ impl Processor for JoinProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Array(arr)) => {
|
||||
Some(VrlValue::Array(arr)) => {
|
||||
let result = self.process(arr)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -129,8 +131,10 @@ impl Processor for JoinProcessor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::processor::join::JoinProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_join_processor() {
|
||||
@@ -140,11 +144,10 @@ mod tests {
|
||||
};
|
||||
|
||||
let arr = vec![
|
||||
Value::String("a".to_string()),
|
||||
Value::String("b".to_string()),
|
||||
]
|
||||
.into();
|
||||
VrlValue::Bytes(Bytes::from("a")),
|
||||
VrlValue::Bytes(Bytes::from("b")),
|
||||
];
|
||||
let result = processor.process(&arr).unwrap();
|
||||
assert_eq!(result, Value::String("a-b".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("a-b")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,16 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{OptionExt as _, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, FieldMustBeTypeSnafu, JsonParseSnafu, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu,
|
||||
ProcessorUnsupportedValueSnafu, Result,
|
||||
ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::{json_to_map, Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PARSE: &str = "json_parse";
|
||||
|
||||
@@ -67,21 +68,21 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonParseProcessor {
|
||||
}
|
||||
|
||||
impl JsonParseProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let Some(json_str) = val.as_str() else {
|
||||
return FieldMustBeTypeSnafu {
|
||||
field: val.to_str_type(),
|
||||
field: val.to_string(),
|
||||
ty: "string",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
let parsed: serde_json::Value = serde_json::from_str(json_str).context(JsonParseSnafu)?;
|
||||
let parsed: VrlValue = serde_json::from_str(&json_str).context(JsonParseSnafu)?;
|
||||
match parsed {
|
||||
serde_json::Value::Object(_) => Ok(json_to_map(parsed)?),
|
||||
serde_json::Value::Array(arr) => Ok(Value::Array(arr.try_into()?)),
|
||||
VrlValue::Object(_) => Ok(parsed),
|
||||
VrlValue::Array(_) => Ok(parsed),
|
||||
_ => ProcessorUnsupportedValueSnafu {
|
||||
processor: self.kind(),
|
||||
val: val.to_str_type(),
|
||||
val: val.to_string(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
@@ -97,14 +98,15 @@ impl Processor for JsonParseProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -123,24 +125,27 @@ impl Processor for JsonParseProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::etl::processor::json_parse::JsonParseProcessor;
|
||||
|
||||
#[test]
|
||||
fn test_json_parse() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let processor = JsonParseProcessor {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::String(r#"{"hello": "world"}"#.to_string()))
|
||||
.process_field(&VrlValue::Bytes(Bytes::from(r#"{"hello": "world"}"#)))
|
||||
.unwrap();
|
||||
|
||||
let expected = Value::Map(crate::Map::one(
|
||||
"hello".to_string(),
|
||||
Value::String("world".to_string()),
|
||||
));
|
||||
let expected = VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world")),
|
||||
)]));
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
@@ -14,17 +14,17 @@
|
||||
|
||||
use jsonpath_rust::JsonPath;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
Error, JsonParseSnafu, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
|
||||
|
||||
@@ -84,7 +84,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessor {
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessor {
|
||||
fields: Fields,
|
||||
json_path: JsonPath<Value>,
|
||||
json_path: JsonPath<serde_json::Value>,
|
||||
ignore_missing: bool,
|
||||
result_index: Option<usize>,
|
||||
}
|
||||
@@ -101,17 +101,22 @@ impl Default for JsonPathProcessor {
|
||||
}
|
||||
|
||||
impl JsonPathProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
let processed = self.json_path.find(val);
|
||||
match processed {
|
||||
Value::Array(arr) => {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let v = serde_json::to_value(val).context(JsonParseSnafu)?;
|
||||
let p = self.json_path.find(&v);
|
||||
match p {
|
||||
serde_json::Value::Array(arr) => {
|
||||
if let Some(index) = self.result_index {
|
||||
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
|
||||
Ok(arr
|
||||
.get(index)
|
||||
.cloned()
|
||||
.map(|v| v.into())
|
||||
.unwrap_or(VrlValue::Null))
|
||||
} else {
|
||||
Ok(Value::Array(arr))
|
||||
Ok(VrlValue::Array(arr.into_iter().map(|v| v.into()).collect()))
|
||||
}
|
||||
}
|
||||
v => Ok(v),
|
||||
v => Ok(v.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,14 +130,15 @@ impl Processor for JsonPathProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -151,12 +157,13 @@ impl Processor for JsonPathProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Map;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
#[test]
|
||||
fn test_json_path() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let json_path = JsonPath::try_from("$.hello").unwrap();
|
||||
let processor = JsonPathProcessor {
|
||||
@@ -166,11 +173,11 @@ mod test {
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.process_field(&VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world")),
|
||||
)])))
|
||||
.unwrap();
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("world")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,17 +13,18 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, LetterInvalidMethodSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, METHOD_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_LETTER: &str = "letter";
|
||||
|
||||
@@ -67,15 +68,14 @@ pub struct LetterProcessor {
|
||||
}
|
||||
|
||||
impl LetterProcessor {
|
||||
fn process_field(&self, val: &str) -> Result<Value> {
|
||||
let processed = match self.method {
|
||||
Method::Upper => val.to_uppercase(),
|
||||
Method::Lower => val.to_lowercase(),
|
||||
Method::Capital => capitalize(val),
|
||||
};
|
||||
let val = Value::String(processed);
|
||||
|
||||
Ok(val)
|
||||
fn process_field(&self, val: &Bytes) -> VrlValue {
|
||||
match self.method {
|
||||
Method::Upper => VrlValue::Bytes(Bytes::from(val.to_ascii_uppercase())),
|
||||
Method::Lower => VrlValue::Bytes(Bytes::from(val.to_ascii_lowercase())),
|
||||
Method::Capital => VrlValue::Bytes(Bytes::from(capitalize(
|
||||
String::from_utf8_lossy(val).as_ref(),
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,16 +125,17 @@ impl Processor for LetterProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let result = self.process_field(s)?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process_field(s);
|
||||
let output_key = field.target_or_input_field();
|
||||
val.insert(output_key.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_key), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -167,8 +168,10 @@ fn capitalize(s: &str) -> String {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::processor::letter::{LetterProcessor, Method};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_process() {
|
||||
@@ -177,8 +180,8 @@ mod tests {
|
||||
method: Method::Upper,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("pipeline").unwrap();
|
||||
assert_eq!(Value::String("PIPELINE".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("PIPELINE")), processed)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -186,8 +189,8 @@ mod tests {
|
||||
method: Method::Lower,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("Pipeline").unwrap();
|
||||
assert_eq!(Value::String("pipeline".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("Pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("pipeline")), processed)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -195,8 +198,8 @@ mod tests {
|
||||
method: Method::Capital,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("pipeline").unwrap();
|
||||
assert_eq!(Value::String("Pipeline".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("Pipeline")), processed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,18 +23,19 @@ use std::collections::BTreeMap;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu,
|
||||
RegexNamedGroupNotFoundSnafu, RegexNoValidFieldSnafu, RegexNoValidPatternSnafu, RegexSnafu,
|
||||
Result,
|
||||
Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
|
||||
FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
lazy_static! {
|
||||
static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
|
||||
@@ -168,14 +169,17 @@ impl RegexProcessor {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<String, Value>> {
|
||||
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut result = BTreeMap::new();
|
||||
for gr in self.patterns.iter() {
|
||||
if let Some(captures) = gr.regex.captures(val) {
|
||||
for group in gr.groups.iter() {
|
||||
if let Some(capture) = captures.name(group) {
|
||||
let value = capture.as_str().to_string();
|
||||
result.insert(generate_key(prefix, group), Value::String(value));
|
||||
result.insert(
|
||||
KeyString::from(generate_key(prefix, group)),
|
||||
VrlValue::Bytes(Bytes::from(value)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,16 +197,17 @@ impl Processor for RegexProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let prefix = field.target_or_input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let result = self.process(prefix, s)?;
|
||||
val.extend(result.into())?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process(prefix, String::from_utf8_lossy(s).as_ref())?;
|
||||
val.extend(result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -226,12 +231,11 @@ impl Processor for RegexProcessor {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ahash::{HashMap, HashMapExt};
|
||||
use itertools::Itertools;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::processor::regex::RegexProcessor;
|
||||
use crate::etl::value::{Map, Value};
|
||||
|
||||
#[test]
|
||||
fn test_simple_parse() {
|
||||
@@ -250,15 +254,11 @@ ignore_missing: false"#;
|
||||
|
||||
let result = processor.process("a", "123").unwrap();
|
||||
|
||||
let map = Map { values: result };
|
||||
let v = vec![(KeyString::from("a_ar"), VrlValue::Bytes(Bytes::from("1")))]
|
||||
.into_iter()
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
let v = Map {
|
||||
values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
};
|
||||
|
||||
assert_eq!(v, map);
|
||||
assert_eq!(v, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -270,15 +270,30 @@ ignore_missing: false"#;
|
||||
let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
|
||||
let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");
|
||||
|
||||
let temporary_map: BTreeMap<String, Value> = [
|
||||
("breadcrumbs_parent", Value::String(cc.to_string())),
|
||||
("breadcrumbs_edge", Value::String(cg.to_string())),
|
||||
("breadcrumbs_origin", Value::String(co.to_string())),
|
||||
("breadcrumbs_peer", Value::String(cp.to_string())),
|
||||
("breadcrumbs_wrapper", Value::String(cw.to_string())),
|
||||
let temporary_map: BTreeMap<KeyString, VrlValue> = [
|
||||
(
|
||||
"breadcrumbs_parent",
|
||||
VrlValue::Bytes(Bytes::from(cc.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_edge",
|
||||
VrlValue::Bytes(Bytes::from(cg.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_origin",
|
||||
VrlValue::Bytes(Bytes::from(co.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_peer",
|
||||
VrlValue::Bytes(Bytes::from(cp.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_wrapper",
|
||||
VrlValue::Bytes(Bytes::from(cw.to_string())),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.map(|(k, v)| (KeyString::from(k), v))
|
||||
.collect();
|
||||
|
||||
{
|
||||
@@ -331,35 +346,66 @@ ignore_missing: false"#;
|
||||
let processor_yaml_hash = processor_yaml.as_hash().unwrap();
|
||||
let processor = RegexProcessor::try_from(processor_yaml_hash).unwrap();
|
||||
|
||||
let mut result = HashMap::new();
|
||||
let mut result = BTreeMap::new();
|
||||
for field in processor.fields.iter() {
|
||||
let s = temporary_map
|
||||
.get(field.input_field())
|
||||
.unwrap()
|
||||
.to_str_value();
|
||||
let s = temporary_map.get(field.input_field()).unwrap();
|
||||
let s = s.to_string_lossy();
|
||||
let prefix = field.target_or_input_field();
|
||||
|
||||
let r = processor.process(prefix, &s).unwrap();
|
||||
let r = processor.process(prefix, s.as_ref()).unwrap();
|
||||
|
||||
result.extend(r);
|
||||
}
|
||||
|
||||
let new_values = vec![
|
||||
("edge_ip", Value::String("12.34.567.89".to_string())),
|
||||
("edge_request_id", Value::String("12345678".to_string())),
|
||||
("edge_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("edge_asn", Value::String("20940".to_string())),
|
||||
("origin_ip", Value::String("987.654.321.09".to_string())),
|
||||
("peer_asn", Value::String("55155".to_string())),
|
||||
("peer_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("parent_asn", Value::String("55155".to_string())),
|
||||
("parent_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("wrapper_asn", Value::String("55155".to_string())),
|
||||
("wrapper_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
(
|
||||
"edge_ip",
|
||||
VrlValue::Bytes(Bytes::from("12.34.567.89".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_request_id",
|
||||
VrlValue::Bytes(Bytes::from("12345678".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_asn",
|
||||
VrlValue::Bytes(Bytes::from("20940".to_string())),
|
||||
),
|
||||
(
|
||||
"origin_ip",
|
||||
VrlValue::Bytes(Bytes::from("987.654.321.09".to_string())),
|
||||
),
|
||||
(
|
||||
"peer_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"peer_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"parent_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"parent_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"wrapper_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"wrapper_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect();
|
||||
.map(|(k, v)| (KeyString::from(k), v))
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
assert_eq!(result, new_values);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use ahash::{HashSet, HashSetExt};
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
|
||||
@@ -22,7 +23,7 @@ use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME, TYPE_NAME,
|
||||
};
|
||||
use crate::{Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_SELECT: &str = "select";
|
||||
const INCLUDE_KEY: &str = "include";
|
||||
@@ -98,8 +99,8 @@ impl Processor for SelectProcessor {
|
||||
true
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
let v_map = val.as_map_mut().context(ValueMustBeMapSnafu)?;
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
let v_map = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match self.select_type {
|
||||
SelectType::Include => {
|
||||
@@ -109,7 +110,7 @@ impl Processor for SelectProcessor {
|
||||
let field_name = field.input_field();
|
||||
if let Some(target_name) = field.target_field() {
|
||||
if let Some(v) = v_map.remove(field_name) {
|
||||
v_map.insert(target_name.to_string(), v);
|
||||
v_map.insert(KeyString::from(target_name), v);
|
||||
}
|
||||
include_key_set.insert(target_name);
|
||||
} else {
|
||||
@@ -133,9 +134,12 @@ impl Processor for SelectProcessor {
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::select::{SelectProcessor, SelectType};
|
||||
use crate::{Map, Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
#[test]
|
||||
fn test_select() {
|
||||
@@ -145,15 +149,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello"), Some(&Value::String("world".to_string())));
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -164,15 +177,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello3"), Some(&Value::String("world".to_string())));
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello3")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -183,15 +205,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello"), None);
|
||||
assert_eq!(p.get("hello2"), Some(&Value::String("world2".to_string())));
|
||||
assert_eq!(p.get(&KeyString::from("hello")), None);
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello2")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world2".to_string())))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,14 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt as _;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result};
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, KEY_NAME,
|
||||
};
|
||||
use crate::{Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_SIMPLE_EXTRACT: &str = "simple_extract";
|
||||
|
||||
@@ -74,14 +77,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for SimpleExtractProcessor {
|
||||
}
|
||||
|
||||
impl SimpleExtractProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let mut current = val;
|
||||
for key in self.key.iter() {
|
||||
let Value::Map(map) = current else {
|
||||
return Ok(Value::Null);
|
||||
let VrlValue::Object(map) = current else {
|
||||
return Ok(VrlValue::Null);
|
||||
};
|
||||
let Some(v) = map.get(key) else {
|
||||
return Ok(Value::Null);
|
||||
let Some(v) = map.get(key.as_str()) else {
|
||||
return Ok(VrlValue::Null);
|
||||
};
|
||||
current = v;
|
||||
}
|
||||
@@ -98,14 +101,15 @@ impl Processor for SimpleExtractProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -124,11 +128,13 @@ impl Processor for SimpleExtractProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
#[test]
|
||||
fn test_simple_extract() {
|
||||
use super::*;
|
||||
use crate::{Map, Value};
|
||||
|
||||
let processor = SimpleExtractProcessor {
|
||||
key: vec!["hello".to_string()],
|
||||
@@ -136,12 +142,12 @@ mod test {
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.process_field(&VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
)])))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("world".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,19 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::{decode, encode};
|
||||
use snafu::OptionExt;
|
||||
use urlencoding::{decode_binary, encode_binary};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
UrlEncodingDecodeSnafu, UrlEncodingInvalidMethodSnafu,
|
||||
UrlEncodingInvalidMethodSnafu, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, METHOD_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";
|
||||
|
||||
@@ -65,12 +66,12 @@ pub struct UrlEncodingProcessor {
|
||||
}
|
||||
|
||||
impl UrlEncodingProcessor {
|
||||
fn process_field(&self, val: &str) -> Result<Value> {
|
||||
fn process_field(&self, val: &Bytes) -> Result<VrlValue> {
|
||||
let processed = match self.method {
|
||||
Method::Encode => encode(val).to_string(),
|
||||
Method::Decode => decode(val).context(UrlEncodingDecodeSnafu)?.into_owned(),
|
||||
Method::Encode => Bytes::from_iter(encode_binary(val).bytes()),
|
||||
Method::Decode => Bytes::from(decode_binary(val).to_vec()),
|
||||
};
|
||||
Ok(Value::String(processed))
|
||||
Ok(VrlValue::Bytes(processed))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,16 +126,17 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process_field(s)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -159,9 +161,11 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::urlencoding::UrlEncodingProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_decode_url() {
|
||||
@@ -170,8 +174,8 @@ mod tests {
|
||||
|
||||
{
|
||||
let processor = UrlEncodingProcessor::default();
|
||||
let result = processor.process_field(encoded).unwrap();
|
||||
assert_eq!(Value::String(decoded.into()), result)
|
||||
let result = processor.process_field(&Bytes::from(encoded)).unwrap();
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from(decoded)), result)
|
||||
}
|
||||
{
|
||||
let processor = UrlEncodingProcessor {
|
||||
@@ -179,8 +183,8 @@ mod tests {
|
||||
method: super::Method::Encode,
|
||||
ignore_missing: false,
|
||||
};
|
||||
let result = processor.process_field(decoded).unwrap();
|
||||
assert_eq!(Value::String(encoded.into()), result)
|
||||
let result = processor.process_field(&Bytes::from(decoded)).unwrap();
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from(encoded)), result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,19 +15,18 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
use vrl::compiler::runtime::Runtime;
|
||||
use vrl::compiler::{compile, Program, TargetValue};
|
||||
use vrl::diagnostic::Formatter;
|
||||
use vrl::prelude::{Bytes, NotNan, TimeZone};
|
||||
use vrl::value::{KeyString, Kind, Secrets, Value as VrlValue};
|
||||
use vrl::prelude::TimeZone;
|
||||
use vrl::value::{Kind, Secrets, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
BytesToUtf8Snafu, CompileVrlSnafu, Error, ExecuteVrlSnafu, FloatNaNSnafu,
|
||||
InvalidTimestampSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu, VrlReturnValueSnafu,
|
||||
CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
|
||||
VrlReturnValueSnafu,
|
||||
};
|
||||
use crate::etl::processor::yaml_string;
|
||||
use crate::Value as PipelineValue;
|
||||
|
||||
pub(crate) const PROCESSOR_VRL: &str = "vrl";
|
||||
const SOURCE: &str = "source";
|
||||
@@ -62,11 +61,9 @@ impl VrlProcessor {
|
||||
Ok(Self { source, program })
|
||||
}
|
||||
|
||||
pub fn resolve(&self, m: PipelineValue) -> Result<PipelineValue> {
|
||||
let pipeline_vrl = pipeline_value_to_vrl_value(m)?;
|
||||
|
||||
pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
|
||||
let mut target = TargetValue {
|
||||
value: pipeline_vrl,
|
||||
value,
|
||||
metadata: VrlValue::Object(BTreeMap::new()),
|
||||
secrets: Secrets::default(),
|
||||
};
|
||||
@@ -82,7 +79,7 @@ impl VrlProcessor {
|
||||
.build()
|
||||
})?;
|
||||
|
||||
vrl_value_to_pipeline_value(re)
|
||||
Ok(re)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,91 +110,17 @@ impl crate::etl::processor::Processor for VrlProcessor {
|
||||
true
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: PipelineValue) -> Result<PipelineValue> {
|
||||
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
|
||||
let val = self.resolve(val)?;
|
||||
|
||||
if let PipelineValue::Map(m) = val {
|
||||
Ok(PipelineValue::Map(m.values.into()))
|
||||
if let VrlValue::Object(_) = val {
|
||||
Ok(val)
|
||||
} else {
|
||||
VrlRegexValueSnafu.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn pipeline_value_to_vrl_value(v: PipelineValue) -> Result<VrlValue> {
|
||||
match v {
|
||||
PipelineValue::Null => Ok(VrlValue::Null),
|
||||
PipelineValue::Int8(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int16(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int32(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int64(x) => Ok(VrlValue::Integer(x)),
|
||||
PipelineValue::Uint8(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint16(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint32(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint64(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Float32(x) => NotNan::new(x as f64)
|
||||
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
|
||||
.map(VrlValue::Float),
|
||||
PipelineValue::Float64(x) => NotNan::new(x)
|
||||
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
|
||||
.map(VrlValue::Float),
|
||||
PipelineValue::Boolean(x) => Ok(VrlValue::Boolean(x)),
|
||||
PipelineValue::String(x) => Ok(VrlValue::Bytes(Bytes::copy_from_slice(x.as_bytes()))),
|
||||
PipelineValue::Timestamp(x) => x
|
||||
.to_datetime()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: x.to_string(),
|
||||
})
|
||||
.map(VrlValue::Timestamp),
|
||||
PipelineValue::Array(array) => Ok(VrlValue::Array(
|
||||
array
|
||||
.into_iter()
|
||||
.map(pipeline_value_to_vrl_value)
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
)),
|
||||
PipelineValue::Map(m) => {
|
||||
let values = m
|
||||
.values
|
||||
.into_iter()
|
||||
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
|
||||
.collect::<Result<BTreeMap<_, _>>>()?;
|
||||
Ok(VrlValue::Object(values))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vrl_value_to_pipeline_value(v: VrlValue) -> Result<PipelineValue> {
|
||||
match v {
|
||||
VrlValue::Bytes(bytes) => String::from_utf8(bytes.to_vec())
|
||||
.context(BytesToUtf8Snafu)
|
||||
.map(PipelineValue::String),
|
||||
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
|
||||
VrlValue::Integer(x) => Ok(PipelineValue::Int64(x)),
|
||||
VrlValue::Float(not_nan) => Ok(PipelineValue::Float64(not_nan.into_inner())),
|
||||
VrlValue::Boolean(b) => Ok(PipelineValue::Boolean(b)),
|
||||
VrlValue::Timestamp(date_time) => crate::etl::value::Timestamp::from_datetime(date_time)
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: date_time.to_string(),
|
||||
})
|
||||
.map(PipelineValue::Timestamp),
|
||||
VrlValue::Object(bm) => {
|
||||
let b = bm
|
||||
.into_iter()
|
||||
.map(|(k, v)| vrl_value_to_pipeline_value(v).map(|v| (k.to_string(), v)))
|
||||
.collect::<Result<BTreeMap<String, PipelineValue>>>()?;
|
||||
Ok(PipelineValue::Map(b.into()))
|
||||
}
|
||||
VrlValue::Array(values) => {
|
||||
let a = values
|
||||
.into_iter()
|
||||
.map(vrl_value_to_pipeline_value)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(PipelineValue::Array(a.into()))
|
||||
}
|
||||
VrlValue::Null => Ok(PipelineValue::Null),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_regex_output(output_kind: &Kind) -> Result<()> {
|
||||
if output_kind.is_regex() {
|
||||
return VrlRegexValueSnafu.fail();
|
||||
@@ -223,9 +146,10 @@ fn check_regex_output(output_kind: &Kind) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::KeyString;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::value::Timestamp;
|
||||
use crate::Map;
|
||||
|
||||
#[test]
|
||||
fn test_vrl() {
|
||||
@@ -243,31 +167,27 @@ del(.user_info)
|
||||
|
||||
let mut n = BTreeMap::new();
|
||||
n.insert(
|
||||
"name".to_string(),
|
||||
PipelineValue::String("certain_name".to_string()),
|
||||
KeyString::from("name"),
|
||||
VrlValue::Bytes(Bytes::from("certain_name")),
|
||||
);
|
||||
|
||||
let mut m = BTreeMap::new();
|
||||
m.insert(
|
||||
"user_info".to_string(),
|
||||
PipelineValue::Map(Map { values: n }),
|
||||
);
|
||||
m.insert(KeyString::from("user_info"), VrlValue::Object(n));
|
||||
|
||||
let re = v.resolve(PipelineValue::Map(Map { values: m }));
|
||||
let re = v.resolve(VrlValue::Object(m));
|
||||
assert!(re.is_ok());
|
||||
let re = re.unwrap();
|
||||
|
||||
assert!(matches!(re, PipelineValue::Map(_)));
|
||||
assert!(matches!(re, VrlValue::Object(_)));
|
||||
let re = re.as_object().unwrap();
|
||||
assert!(re.get("name").is_some());
|
||||
let name = re.get("name").unwrap();
|
||||
assert!(matches!(name.get("a").unwrap(), PipelineValue::String(x) if x == "certain_name"));
|
||||
assert!(matches!(name.get("b").unwrap(), PipelineValue::String(x) if x == "certain_name"));
|
||||
let name = name.as_object().unwrap();
|
||||
assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
|
||||
assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
|
||||
assert!(re.get("timestamp").is_some());
|
||||
let timestamp = re.get("timestamp").unwrap();
|
||||
assert!(matches!(
|
||||
timestamp,
|
||||
PipelineValue::Timestamp(Timestamp::Nanosecond(_))
|
||||
));
|
||||
assert!(matches!(timestamp, VrlValue::Timestamp(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -15,16 +15,20 @@
|
||||
pub mod index;
|
||||
pub mod transformer;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::ColumnDataType;
|
||||
use chrono::Utc;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, Result, TransformElementMustBeMapSnafu,
|
||||
TransformFieldMustBeSetSnafu, TransformOnFailureInvalidValueSnafu, TransformTypeMustBeSetSnafu,
|
||||
UnsupportedTypeInPipelineSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{yaml_bool, yaml_new_field, yaml_new_fields, yaml_string};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
use crate::etl::value::{parse_str_type, parse_str_value};
|
||||
|
||||
const TRANSFORM_FIELD: &str = "field";
|
||||
const TRANSFORM_FIELDS: &str = "fields";
|
||||
@@ -124,39 +128,61 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Transform {
|
||||
pub fields: Fields,
|
||||
pub type_: Value,
|
||||
pub default: Option<Value>,
|
||||
pub type_: ColumnDataType,
|
||||
pub default: Option<ValueData>,
|
||||
pub index: Option<Index>,
|
||||
pub tag: bool,
|
||||
pub on_failure: Option<OnFailure>,
|
||||
}
|
||||
|
||||
impl Default for Transform {
|
||||
fn default() -> Self {
|
||||
Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Null,
|
||||
default: None,
|
||||
index: None,
|
||||
tag: false,
|
||||
on_failure: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
// valid types
|
||||
// ColumnDataType::Int8
|
||||
// ColumnDataType::Int16
|
||||
// ColumnDataType::Int32
|
||||
// ColumnDataType::Int64
|
||||
// ColumnDataType::Uint8
|
||||
// ColumnDataType::Uint16
|
||||
// ColumnDataType::Uint32
|
||||
// ColumnDataType::Uint64
|
||||
// ColumnDataType::Float32
|
||||
// ColumnDataType::Float64
|
||||
// ColumnDataType::Boolean
|
||||
// ColumnDataType::String
|
||||
// ColumnDataType::TimestampNanosecond
|
||||
// ColumnDataType::TimestampMicrosecond
|
||||
// ColumnDataType::TimestampMillisecond
|
||||
// ColumnDataType::TimestampSecond
|
||||
// ColumnDataType::Binary
|
||||
|
||||
impl Transform {
|
||||
pub(crate) fn get_default(&self) -> Option<&Value> {
|
||||
pub(crate) fn get_default(&self) -> Option<&ValueData> {
|
||||
self.default.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn get_type_matched_default_val(&self) -> &Value {
|
||||
&self.type_
|
||||
pub(crate) fn get_type_matched_default_val(&self) -> Result<ValueData> {
|
||||
get_default_for_type(&self.type_)
|
||||
}
|
||||
|
||||
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<Value> {
|
||||
if matches!(self.type_, Value::Timestamp(_)) && self.index.is_some_and(|i| i == Index::Time)
|
||||
{
|
||||
return Some(Value::Timestamp(Timestamp::default()));
|
||||
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<ValueData> {
|
||||
if is_timestamp_type(&self.type_) && self.index.is_some_and(|i| i == Index::Time) {
|
||||
let now = Utc::now();
|
||||
match self.type_ {
|
||||
ColumnDataType::TimestampSecond => {
|
||||
return Some(ValueData::TimestampSecondValue(now.timestamp()));
|
||||
}
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
return Some(ValueData::TimestampMillisecondValue(now.timestamp_millis()));
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
return Some(ValueData::TimestampMicrosecondValue(now.timestamp_micros()));
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
return Some(ValueData::TimestampNanosecondValue(
|
||||
now.timestamp_nanos_opt()?,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
@@ -166,17 +192,57 @@ impl Transform {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_timestamp_type(ty: &ColumnDataType) -> bool {
|
||||
matches!(
|
||||
ty,
|
||||
ColumnDataType::TimestampSecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampNanosecond
|
||||
)
|
||||
}
|
||||
|
||||
fn get_default_for_type(ty: &ColumnDataType) -> Result<ValueData> {
|
||||
let v = match ty {
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(false),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(0),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(0),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(0),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(0),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(0),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(0),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(0),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(0),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(0.0),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(0.0),
|
||||
ColumnDataType::Binary => ValueData::BinaryValue(jsonb::Value::Null.to_vec()),
|
||||
ColumnDataType::String => ValueData::StringValue(String::new()),
|
||||
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(0),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(0),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(0),
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(0),
|
||||
|
||||
_ => UnsupportedTypeInPipelineSnafu {
|
||||
ty: ty.as_str_name(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
Ok(v)
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
|
||||
let mut fields = Fields::default();
|
||||
let mut type_ = Value::Null;
|
||||
let mut default = None;
|
||||
let mut index = None;
|
||||
let mut tag = false;
|
||||
let mut on_failure = None;
|
||||
|
||||
let mut type_ = None;
|
||||
|
||||
for (k, v) in hash {
|
||||
let key = k
|
||||
.as_str()
|
||||
@@ -192,7 +258,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
|
||||
TRANSFORM_TYPE => {
|
||||
let t = yaml_string(v, TRANSFORM_TYPE)?;
|
||||
type_ = Value::parse_str_type(&t)?;
|
||||
type_ = Some(parse_str_type(&t)?);
|
||||
}
|
||||
|
||||
TRANSFORM_INDEX => {
|
||||
@@ -205,7 +271,17 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
}
|
||||
|
||||
TRANSFORM_DEFAULT => {
|
||||
default = Some(Value::try_from(v)?);
|
||||
default = match v {
|
||||
yaml_rust::Yaml::Real(r) => Some(r.clone()),
|
||||
yaml_rust::Yaml::Integer(i) => Some(i.to_string()),
|
||||
yaml_rust::Yaml::String(s) => Some(s.clone()),
|
||||
yaml_rust::Yaml::Boolean(b) => Some(b.to_string()),
|
||||
yaml_rust::Yaml::Array(_)
|
||||
| yaml_rust::Yaml::Hash(_)
|
||||
| yaml_rust::Yaml::Alias(_)
|
||||
| yaml_rust::Yaml::Null
|
||||
| yaml_rust::Yaml::BadValue => None,
|
||||
};
|
||||
}
|
||||
|
||||
TRANSFORM_ON_FAILURE => {
|
||||
@@ -219,23 +295,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
|
||||
// ensure fields and type
|
||||
ensure!(!fields.is_empty(), TransformFieldMustBeSetSnafu);
|
||||
ensure!(
|
||||
type_ != Value::Null,
|
||||
TransformTypeMustBeSetSnafu {
|
||||
fields: format!("{:?}", fields)
|
||||
}
|
||||
);
|
||||
let type_ = type_.context(TransformTypeMustBeSetSnafu {
|
||||
fields: format!("{:?}", fields),
|
||||
})?;
|
||||
|
||||
let final_default = if let Some(default_value) = default {
|
||||
match default_value {
|
||||
// if default is not set, then it will be regarded as default null
|
||||
Value::Null => None,
|
||||
_ => {
|
||||
let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
|
||||
on_failure = Some(OnFailure::Default);
|
||||
Some(target)
|
||||
}
|
||||
}
|
||||
let target = parse_str_value(&type_, &default_value)?;
|
||||
on_failure = Some(OnFailure::Default);
|
||||
Some(target)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
pub mod coerce;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -24,26 +25,27 @@ use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, SemanticType};
|
||||
use coerce::{coerce_columns, coerce_value};
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_telemetry::warn;
|
||||
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
|
||||
use itertools::Itertools;
|
||||
use jsonb::Number;
|
||||
use once_cell::sync::OnceCell;
|
||||
use serde_json::Number;
|
||||
use session::context::Channel;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::VrlValueConvert;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
IdentifyPipelineColumnTypeMismatchSnafu, ReachedMaxNestedLevelsSnafu, Result,
|
||||
TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
|
||||
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu,
|
||||
UnsupportedNumberTypeSnafu, ValueMustBeMapSnafu,
|
||||
IdentifyPipelineColumnTypeMismatchSnafu, InvalidTimestampSnafu, ReachedMaxNestedLevelsSnafu,
|
||||
Result, TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
|
||||
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::ctx_req::ContextOpt;
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::transform::{Transform, Transforms};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
use crate::etl::PipelineDocVersion;
|
||||
use crate::{unwrap_or_continue_if_err, Map, PipelineContext};
|
||||
use crate::{unwrap_or_continue_if_err, PipelineContext};
|
||||
|
||||
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
|
||||
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
|
||||
@@ -133,7 +135,7 @@ impl GreptimePipelineParams {
|
||||
impl GreptimeTransformer {
|
||||
/// Add a default timestamp column to the transforms
|
||||
fn add_greptime_timestamp_column(transforms: &mut Transforms) {
|
||||
let type_ = Value::Timestamp(Timestamp::Nanosecond(0));
|
||||
let type_ = ColumnDataType::TimestampNanosecond;
|
||||
let default = None;
|
||||
|
||||
let transform = Transform {
|
||||
@@ -220,7 +222,7 @@ impl GreptimeTransformer {
|
||||
|
||||
pub fn transform_mut(
|
||||
&self,
|
||||
pipeline_map: &mut Value,
|
||||
pipeline_map: &mut VrlValue,
|
||||
is_v1: bool,
|
||||
) -> Result<Vec<GreptimeValue>> {
|
||||
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
|
||||
@@ -229,6 +231,7 @@ impl GreptimeTransformer {
|
||||
for field in transform.fields.iter() {
|
||||
let column_name = field.input_field();
|
||||
|
||||
let pipeline_map = pipeline_map.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
// let keep us `get` here to be compatible with v1
|
||||
match pipeline_map.get(column_name) {
|
||||
Some(v) => {
|
||||
@@ -240,11 +243,8 @@ impl GreptimeTransformer {
|
||||
let value_data = match transform.on_failure {
|
||||
Some(crate::etl::transform::OnFailure::Default) => {
|
||||
match transform.get_default() {
|
||||
Some(default) => coerce_value(default, transform)?,
|
||||
None => match transform.get_default_value_when_data_is_none() {
|
||||
Some(default) => coerce_value(&default, transform)?,
|
||||
None => None,
|
||||
},
|
||||
Some(default) => Some(default.clone()),
|
||||
None => transform.get_default_value_when_data_is_none(),
|
||||
}
|
||||
}
|
||||
Some(crate::etl::transform::OnFailure::Ignore) => None,
|
||||
@@ -349,63 +349,22 @@ fn resolve_schema(
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_number_schema(
|
||||
n: Number,
|
||||
column_name: String,
|
||||
index: Option<usize>,
|
||||
row: &mut Vec<GreptimeValue>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
) -> Result<()> {
|
||||
let (value, datatype, semantic_type) = if n.is_i64() {
|
||||
(
|
||||
ValueData::I64Value(n.as_i64().unwrap()),
|
||||
ColumnDataType::Int64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else if n.is_u64() {
|
||||
(
|
||||
ValueData::U64Value(n.as_u64().unwrap()),
|
||||
ColumnDataType::Uint64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else if n.is_f64() {
|
||||
(
|
||||
ValueData::F64Value(n.as_f64().unwrap()),
|
||||
ColumnDataType::Float64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else {
|
||||
return UnsupportedNumberTypeSnafu { value: n }.fail();
|
||||
};
|
||||
resolve_schema(
|
||||
index,
|
||||
value,
|
||||
ColumnSchema {
|
||||
column_name,
|
||||
datatype,
|
||||
semantic_type,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
row,
|
||||
schema_info,
|
||||
)
|
||||
}
|
||||
|
||||
fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>> {
|
||||
fn calc_ts(p_ctx: &PipelineContext, values: &VrlValue) -> Result<Option<ValueData>> {
|
||||
match p_ctx.channel {
|
||||
Channel::Prometheus => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
values
|
||||
.get(GREPTIME_TIMESTAMP)
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or_default(),
|
||||
))),
|
||||
Channel::Prometheus => {
|
||||
let ts = values
|
||||
.as_object()
|
||||
.and_then(|m| m.get(GREPTIME_TIMESTAMP))
|
||||
.and_then(|ts| ts.try_into_i64().ok())
|
||||
.unwrap_or_default();
|
||||
Ok(Some(ValueData::TimestampMillisecondValue(ts)))
|
||||
}
|
||||
_ => {
|
||||
let custom_ts = p_ctx.pipeline_definition.get_custom_ts();
|
||||
match custom_ts {
|
||||
Some(ts) => {
|
||||
let ts_field = values.get(ts.get_column_name());
|
||||
Some(ts.get_timestamp(ts_field)).transpose()
|
||||
let ts_field = values.as_object().and_then(|m| m.get(ts.get_column_name()));
|
||||
Some(ts.get_timestamp_value(ts_field)).transpose()
|
||||
}
|
||||
None => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
|
||||
@@ -417,18 +376,20 @@ fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>>
|
||||
|
||||
pub(crate) fn values_to_row(
|
||||
schema_info: &mut SchemaInfo,
|
||||
values: Value,
|
||||
values: VrlValue,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
row: Option<Vec<GreptimeValue>>,
|
||||
need_calc_ts: bool,
|
||||
) -> Result<Row> {
|
||||
let mut row: Vec<GreptimeValue> =
|
||||
row.unwrap_or_else(|| Vec::with_capacity(schema_info.schema.len()));
|
||||
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
|
||||
|
||||
// calculate timestamp value based on the channel
|
||||
let ts = calc_ts(pipeline_ctx, &values)?;
|
||||
|
||||
row.push(GreptimeValue { value_data: ts });
|
||||
if need_calc_ts {
|
||||
// calculate timestamp value based on the channel
|
||||
let ts = calc_ts(pipeline_ctx, &values)?;
|
||||
row.push(GreptimeValue { value_data: ts });
|
||||
}
|
||||
|
||||
row.resize(schema_info.schema.len(), GreptimeValue { value_data: None });
|
||||
|
||||
@@ -437,14 +398,20 @@ pub(crate) fn values_to_row(
|
||||
.as_ref()
|
||||
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
|
||||
|
||||
let values = values.into_map().context(ValueMustBeMapSnafu)?;
|
||||
let values = values.into_object().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
for (column_name, value) in values {
|
||||
if column_name == ts_column_name {
|
||||
if column_name.as_str() == ts_column_name {
|
||||
continue;
|
||||
}
|
||||
|
||||
resolve_value(value, column_name, &mut row, schema_info, pipeline_ctx)?;
|
||||
resolve_value(
|
||||
value,
|
||||
column_name.into(),
|
||||
&mut row,
|
||||
schema_info,
|
||||
pipeline_ctx,
|
||||
)?;
|
||||
}
|
||||
Ok(Row { values: row })
|
||||
}
|
||||
@@ -458,7 +425,7 @@ fn decide_semantic(p_ctx: &PipelineContext, column_name: &str) -> i32 {
|
||||
}
|
||||
|
||||
fn resolve_value(
|
||||
value: Value,
|
||||
value: VrlValue,
|
||||
column_name: String,
|
||||
row: &mut Vec<GreptimeValue>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
@@ -484,27 +451,23 @@ fn resolve_value(
|
||||
};
|
||||
|
||||
match value {
|
||||
Value::Null => {}
|
||||
VrlValue::Null => {}
|
||||
|
||||
Value::Int8(_) | Value::Int16(_) | Value::Int32(_) | Value::Int64(_) => {
|
||||
VrlValue::Integer(v) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_i64().unwrap();
|
||||
resolve_simple_type(ValueData::I64Value(v), column_name, ColumnDataType::Int64)?;
|
||||
}
|
||||
|
||||
Value::Uint8(_) | Value::Uint16(_) | Value::Uint32(_) | Value::Uint64(_) => {
|
||||
VrlValue::Float(v) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_u64().unwrap();
|
||||
resolve_simple_type(ValueData::U64Value(v), column_name, ColumnDataType::Uint64)?;
|
||||
resolve_simple_type(
|
||||
ValueData::F64Value(v.into()),
|
||||
column_name,
|
||||
ColumnDataType::Float64,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Float32(_) | Value::Float64(_) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_f64().unwrap();
|
||||
resolve_simple_type(ValueData::F64Value(v), column_name, ColumnDataType::Float64)?;
|
||||
}
|
||||
|
||||
Value::Boolean(v) => {
|
||||
VrlValue::Boolean(v) => {
|
||||
resolve_simple_type(
|
||||
ValueData::BoolValue(v),
|
||||
column_name,
|
||||
@@ -512,15 +475,30 @@ fn resolve_value(
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::String(v) => {
|
||||
VrlValue::Bytes(v) => {
|
||||
resolve_simple_type(
|
||||
ValueData::StringValue(v),
|
||||
ValueData::StringValue(String::from_utf8_lossy_owned(v.to_vec())),
|
||||
column_name,
|
||||
ColumnDataType::String,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
|
||||
VrlValue::Regex(v) => {
|
||||
warn!(
|
||||
"Persisting regex value in the table, this should not happen, column_name: {}",
|
||||
column_name
|
||||
);
|
||||
resolve_simple_type(
|
||||
ValueData::StringValue(v.to_string()),
|
||||
column_name,
|
||||
ColumnDataType::String,
|
||||
)?;
|
||||
}
|
||||
|
||||
VrlValue::Timestamp(ts) => {
|
||||
let ns = ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
|
||||
input: ts.to_rfc3339(),
|
||||
})?;
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampNanosecondValue(ns),
|
||||
column_name,
|
||||
@@ -528,32 +506,8 @@ fn resolve_value(
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Microsecond(us)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampMicrosecondValue(us),
|
||||
column_name,
|
||||
ColumnDataType::TimestampMicrosecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Millisecond(ms)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampMillisecondValue(ms),
|
||||
column_name,
|
||||
ColumnDataType::TimestampMillisecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Second(s)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampSecondValue(s),
|
||||
column_name,
|
||||
ColumnDataType::TimestampSecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
let data: jsonb::Value = value.into();
|
||||
VrlValue::Array(_) | VrlValue::Object(_) => {
|
||||
let data = vrl_value_to_jsonb_value(&value);
|
||||
resolve_schema(
|
||||
index,
|
||||
ValueData::BinaryValue(data.to_vec()),
|
||||
@@ -574,8 +528,32 @@ fn resolve_value(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn vrl_value_to_jsonb_value<'a>(value: &'a VrlValue) -> jsonb::Value<'a> {
|
||||
match value {
|
||||
VrlValue::Bytes(bytes) => jsonb::Value::String(String::from_utf8_lossy(bytes)),
|
||||
VrlValue::Regex(value_regex) => jsonb::Value::String(Cow::Borrowed(value_regex.as_str())),
|
||||
VrlValue::Integer(i) => jsonb::Value::Number(Number::Int64(*i)),
|
||||
VrlValue::Float(not_nan) => jsonb::Value::Number(Number::Float64(not_nan.into_inner())),
|
||||
VrlValue::Boolean(b) => jsonb::Value::Bool(*b),
|
||||
VrlValue::Timestamp(date_time) => jsonb::Value::String(Cow::Owned(date_time.to_rfc3339())),
|
||||
VrlValue::Object(btree_map) => jsonb::Value::Object(
|
||||
btree_map
|
||||
.iter()
|
||||
.map(|(key, value)| (key.to_string(), vrl_value_to_jsonb_value(value)))
|
||||
.collect(),
|
||||
),
|
||||
VrlValue::Array(values) => jsonb::Value::Array(
|
||||
values
|
||||
.iter()
|
||||
.map(|value| vrl_value_to_jsonb_value(value))
|
||||
.collect(),
|
||||
),
|
||||
VrlValue::Null => jsonb::Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn identity_pipeline_inner(
|
||||
pipeline_maps: Vec<Value>,
|
||||
pipeline_maps: Vec<VrlValue>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
) -> Result<(SchemaInfo, HashMap<ContextOpt, Vec<Row>>)> {
|
||||
let skip_error = pipeline_ctx.pipeline_param.skip_error();
|
||||
@@ -585,7 +563,7 @@ fn identity_pipeline_inner(
|
||||
// set time index column schema first
|
||||
schema_info.schema.push(ColumnSchema {
|
||||
column_name: custom_ts
|
||||
.map(|ts| ts.get_column_name().clone())
|
||||
.map(|ts| ts.get_column_name().to_string())
|
||||
.unwrap_or_else(|| DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string()),
|
||||
datatype: custom_ts.map(|c| c.get_datatype()).unwrap_or_else(|| {
|
||||
if pipeline_ctx.channel == Channel::Prometheus {
|
||||
@@ -608,7 +586,7 @@ fn identity_pipeline_inner(
|
||||
skip_error
|
||||
);
|
||||
let row = unwrap_or_continue_if_err!(
|
||||
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None),
|
||||
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None, true),
|
||||
skip_error
|
||||
);
|
||||
|
||||
@@ -640,7 +618,7 @@ fn identity_pipeline_inner(
|
||||
/// 4. The pipeline will return an error if the same column datatype is mismatched
|
||||
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
|
||||
pub fn identity_pipeline(
|
||||
array: Vec<Value>,
|
||||
array: Vec<VrlValue>,
|
||||
table: Option<Arc<table::Table>>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
) -> Result<HashMap<ContextOpt, Rows>> {
|
||||
@@ -688,22 +666,22 @@ pub fn identity_pipeline(
|
||||
///
|
||||
/// The `max_nested_levels` parameter is used to limit the nested levels of the JSON object.
|
||||
/// The error will be returned if the nested levels is greater than the `max_nested_levels`.
|
||||
pub fn flatten_object(object: Value, max_nested_levels: usize) -> Result<Value> {
|
||||
pub fn flatten_object(object: VrlValue, max_nested_levels: usize) -> Result<VrlValue> {
|
||||
let mut flattened = BTreeMap::new();
|
||||
let object = object.into_map().context(ValueMustBeMapSnafu)?;
|
||||
let object = object.into_object().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
if !object.is_empty() {
|
||||
// it will use recursion to flatten the object.
|
||||
do_flatten_object(&mut flattened, None, object, 1, max_nested_levels)?;
|
||||
}
|
||||
|
||||
Ok(Value::Map(Map { values: flattened }))
|
||||
Ok(VrlValue::Object(flattened))
|
||||
}
|
||||
|
||||
fn do_flatten_object(
|
||||
dest: &mut BTreeMap<String, Value>,
|
||||
dest: &mut BTreeMap<KeyString, VrlValue>,
|
||||
base: Option<&str>,
|
||||
object: BTreeMap<String, Value>,
|
||||
object: BTreeMap<KeyString, VrlValue>,
|
||||
current_level: usize,
|
||||
max_nested_levels: usize,
|
||||
) -> Result<()> {
|
||||
@@ -713,14 +691,17 @@ fn do_flatten_object(
|
||||
}
|
||||
|
||||
for (key, value) in object {
|
||||
let new_key = base.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
let new_key = base.map_or_else(
|
||||
|| key.clone(),
|
||||
|base_key| format!("{base_key}.{key}").into(),
|
||||
);
|
||||
|
||||
match value {
|
||||
Value::Map(object) => {
|
||||
VrlValue::Object(object) => {
|
||||
do_flatten_object(
|
||||
dest,
|
||||
Some(&new_key),
|
||||
object.values,
|
||||
object,
|
||||
current_level + 1,
|
||||
max_nested_levels,
|
||||
)?;
|
||||
@@ -740,7 +721,6 @@ mod tests {
|
||||
use api::v1::SemanticType;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::{json_array_to_map, json_to_map};
|
||||
use crate::{identity_pipeline, PipelineDefinition};
|
||||
|
||||
#[test]
|
||||
@@ -752,7 +732,7 @@ mod tests {
|
||||
Channel::Unknown,
|
||||
);
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -772,7 +752,7 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let array = json_array_to_map(array).unwrap();
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
@@ -781,7 +761,7 @@ mod tests {
|
||||
);
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -801,7 +781,8 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
rows.err().unwrap().to_string(),
|
||||
@@ -809,7 +790,7 @@ mod tests {
|
||||
);
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -829,7 +810,8 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_ok());
|
||||
let mut rows = rows.unwrap();
|
||||
assert!(rows.len() == 1);
|
||||
@@ -840,7 +822,7 @@ mod tests {
|
||||
assert_eq!(8, rows.rows[1].values.len());
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -862,22 +844,23 @@ mod tests {
|
||||
];
|
||||
let tag_column_names = ["name".to_string(), "address".to_string()];
|
||||
|
||||
let rows = identity_pipeline_inner(json_array_to_map(array).unwrap(), &pipeline_ctx)
|
||||
.map(|(mut schema, mut rows)| {
|
||||
for name in tag_column_names {
|
||||
if let Some(index) = schema.index.get(&name) {
|
||||
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
|
||||
let rows =
|
||||
identity_pipeline_inner(array.iter().map(|v| v.into()).collect(), &pipeline_ctx)
|
||||
.map(|(mut schema, mut rows)| {
|
||||
for name in tag_column_names {
|
||||
if let Some(index) = schema.index.get(&name) {
|
||||
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(rows.len() == 1);
|
||||
let rows = rows.remove(&ContextOpt::default()).unwrap();
|
||||
assert!(rows.len() == 1);
|
||||
let rows = rows.remove(&ContextOpt::default()).unwrap();
|
||||
|
||||
Rows {
|
||||
schema: schema.schema,
|
||||
rows,
|
||||
}
|
||||
});
|
||||
Rows {
|
||||
schema: schema.schema,
|
||||
rows,
|
||||
}
|
||||
});
|
||||
|
||||
assert!(rows.is_ok());
|
||||
let rows = rows.unwrap();
|
||||
@@ -974,8 +957,8 @@ mod tests {
|
||||
];
|
||||
|
||||
for (input, max_depth, expected) in test_cases {
|
||||
let input = json_to_map(input).unwrap();
|
||||
let expected = expected.map(|e| json_to_map(e).unwrap());
|
||||
let input = input.into();
|
||||
let expected = expected.map(|e| e.into());
|
||||
|
||||
let flattened_object = flatten_object(input, max_depth).ok();
|
||||
assert_eq!(flattened_object, expected);
|
||||
|
||||
@@ -18,58 +18,17 @@ use api::v1::{ColumnDataTypeExtension, ColumnOptions, JsonTypeExtension};
|
||||
use datatypes::schema::{FulltextOptions, SkippingIndexOptions};
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{ColumnDataType, ColumnSchema, SemanticType};
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
CoerceIncompatibleTypesSnafu, CoerceJsonTypeToSnafu, CoerceStringToTypeSnafu,
|
||||
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, CoerceUnsupportedNullTypeSnafu,
|
||||
CoerceUnsupportedNullTypeToSnafu, ColumnOptionsSnafu, Error, Result,
|
||||
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, ColumnOptionsSnafu,
|
||||
InvalidTimestampSnafu, Result, UnsupportedTypeInPipelineSnafu, VrlRegexValueSnafu,
|
||||
};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::transform::transformer::greptime::vrl_value_to_jsonb_value;
|
||||
use crate::etl::transform::{OnFailure, Transform};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
impl TryFrom<Value> for ValueData {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: Value) -> Result<Self> {
|
||||
match value {
|
||||
Value::Null => CoerceUnsupportedNullTypeSnafu.fail(),
|
||||
|
||||
Value::Int8(v) => Ok(ValueData::I32Value(v as i32)),
|
||||
Value::Int16(v) => Ok(ValueData::I32Value(v as i32)),
|
||||
Value::Int32(v) => Ok(ValueData::I32Value(v)),
|
||||
Value::Int64(v) => Ok(ValueData::I64Value(v)),
|
||||
|
||||
Value::Uint8(v) => Ok(ValueData::U32Value(v as u32)),
|
||||
Value::Uint16(v) => Ok(ValueData::U32Value(v as u32)),
|
||||
Value::Uint32(v) => Ok(ValueData::U32Value(v)),
|
||||
Value::Uint64(v) => Ok(ValueData::U64Value(v)),
|
||||
|
||||
Value::Float32(v) => Ok(ValueData::F32Value(v)),
|
||||
Value::Float64(v) => Ok(ValueData::F64Value(v)),
|
||||
|
||||
Value::Boolean(v) => Ok(ValueData::BoolValue(v)),
|
||||
Value::String(v) => Ok(ValueData::StringValue(v)),
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
|
||||
Ok(ValueData::TimestampNanosecondValue(ns))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Microsecond(us)) => {
|
||||
Ok(ValueData::TimestampMicrosecondValue(us))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Millisecond(ms)) => {
|
||||
Ok(ValueData::TimestampMillisecondValue(ms))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Second(s)) => Ok(ValueData::TimestampSecondValue(s)),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
let data: jsonb::Value = value.into();
|
||||
Ok(ValueData::BinaryValue(data.to_vec()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>> {
|
||||
let mut columns = Vec::new();
|
||||
@@ -77,15 +36,21 @@ pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>>
|
||||
for field in transform.fields.iter() {
|
||||
let column_name = field.target_or_input_field().to_string();
|
||||
|
||||
let (datatype, datatype_extension) = coerce_type(transform)?;
|
||||
let ext = if matches!(transform.type_, ColumnDataType::Binary) {
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let semantic_type = coerce_semantic_type(transform) as i32;
|
||||
|
||||
let column = ColumnSchema {
|
||||
column_name,
|
||||
datatype: datatype as i32,
|
||||
datatype: transform.type_ as i32,
|
||||
semantic_type,
|
||||
datatype_extension,
|
||||
datatype_extension: ext,
|
||||
options: coerce_options(transform)?,
|
||||
};
|
||||
columns.push(column);
|
||||
@@ -123,113 +88,60 @@ fn coerce_options(transform: &Transform) -> Result<Option<ColumnOptions>> {
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_type(transform: &Transform) -> Result<(ColumnDataType, Option<ColumnDataTypeExtension>)> {
|
||||
match transform.type_ {
|
||||
Value::Int8(_) => Ok((ColumnDataType::Int8, None)),
|
||||
Value::Int16(_) => Ok((ColumnDataType::Int16, None)),
|
||||
Value::Int32(_) => Ok((ColumnDataType::Int32, None)),
|
||||
Value::Int64(_) => Ok((ColumnDataType::Int64, None)),
|
||||
|
||||
Value::Uint8(_) => Ok((ColumnDataType::Uint8, None)),
|
||||
Value::Uint16(_) => Ok((ColumnDataType::Uint16, None)),
|
||||
Value::Uint32(_) => Ok((ColumnDataType::Uint32, None)),
|
||||
Value::Uint64(_) => Ok((ColumnDataType::Uint64, None)),
|
||||
|
||||
Value::Float32(_) => Ok((ColumnDataType::Float32, None)),
|
||||
Value::Float64(_) => Ok((ColumnDataType::Float64, None)),
|
||||
|
||||
Value::Boolean(_) => Ok((ColumnDataType::Boolean, None)),
|
||||
Value::String(_) => Ok((ColumnDataType::String, None)),
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampNanosecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Microsecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampMicrosecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Millisecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampMillisecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Second(_)) => Ok((ColumnDataType::TimestampSecond, None)),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => Ok((
|
||||
ColumnDataType::Binary,
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
)),
|
||||
|
||||
Value::Null => CoerceUnsupportedNullTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn coerce_value(val: &Value, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
pub(crate) fn coerce_value(val: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match val {
|
||||
Value::Null => Ok(None),
|
||||
|
||||
Value::Int8(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int16(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int32(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int64(n) => coerce_i64_value(*n, transform),
|
||||
|
||||
Value::Uint8(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint16(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint32(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint64(n) => coerce_u64_value(*n, transform),
|
||||
|
||||
Value::Float32(n) => coerce_f64_value(*n as f64, transform),
|
||||
Value::Float64(n) => coerce_f64_value(*n, transform),
|
||||
|
||||
Value::Boolean(b) => coerce_bool_value(*b, transform),
|
||||
Value::String(s) => coerce_string_value(s, transform),
|
||||
|
||||
Value::Timestamp(input_timestamp) => match &transform.type_ {
|
||||
Value::Timestamp(target_timestamp) => match target_timestamp {
|
||||
Timestamp::Nanosecond(_) => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
input_timestamp.timestamp_nanos(),
|
||||
))),
|
||||
Timestamp::Microsecond(_) => Ok(Some(ValueData::TimestampMicrosecondValue(
|
||||
input_timestamp.timestamp_micros(),
|
||||
))),
|
||||
Timestamp::Millisecond(_) => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
input_timestamp.timestamp_millis(),
|
||||
))),
|
||||
Timestamp::Second(_) => Ok(Some(ValueData::TimestampSecondValue(
|
||||
input_timestamp.timestamp(),
|
||||
))),
|
||||
},
|
||||
VrlValue::Null => Ok(None),
|
||||
VrlValue::Integer(n) => coerce_i64_value(*n, transform),
|
||||
VrlValue::Float(n) => coerce_f64_value(n.into_inner(), transform),
|
||||
VrlValue::Boolean(b) => coerce_bool_value(*b, transform),
|
||||
VrlValue::Bytes(b) => coerce_string_value(String::from_utf8_lossy(b).as_ref(), transform),
|
||||
VrlValue::Timestamp(ts) => match transform.type_ {
|
||||
ColumnDataType::TimestampNanosecond => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
|
||||
input: ts.to_rfc3339(),
|
||||
})?,
|
||||
))),
|
||||
ColumnDataType::TimestampMicrosecond => Ok(Some(ValueData::TimestampMicrosecondValue(
|
||||
ts.timestamp_micros(),
|
||||
))),
|
||||
ColumnDataType::TimestampMillisecond => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
ts.timestamp_millis(),
|
||||
))),
|
||||
ColumnDataType::TimestampSecond => {
|
||||
Ok(Some(ValueData::TimestampSecondValue(ts.timestamp())))
|
||||
}
|
||||
_ => CoerceIncompatibleTypesSnafu {
|
||||
msg: "Timestamp can only be coerced to another type",
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => coerce_json_value(val, transform),
|
||||
VrlValue::Array(_) | VrlValue::Object(_) => coerce_json_value(val, transform),
|
||||
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(b as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(b as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(b as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(b as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(b as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(b as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(b as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(b as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(b as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(b as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(b as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(b as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(b as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(b as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(b as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(b as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
|
||||
Value::Float64(_) => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(b),
|
||||
Value::String(_) => ValueData::StringValue(b.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(b),
|
||||
ColumnDataType::String => ValueData::StringValue(b.to_string()),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => return Ok(None),
|
||||
Some(OnFailure::Default) => {
|
||||
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
|
||||
@@ -239,14 +151,19 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
|
||||
}
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => {
|
||||
return UnsupportedTypeInPipelineSnafu {
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -254,37 +171,35 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
|
||||
|
||||
fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match &transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n as f64),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(unit) => match unit {
|
||||
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n),
|
||||
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n),
|
||||
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n),
|
||||
Timestamp::Second(_) => ValueData::TimestampSecondValue(n),
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n),
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -292,37 +207,35 @@ fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
|
||||
fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match &transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n as f64),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(unit) => match unit {
|
||||
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n as i64),
|
||||
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n as i64),
|
||||
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n as i64),
|
||||
Timestamp::Second(_) => ValueData::TimestampSecondValue(n as i64),
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n as i64),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n as i64),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n as i64),
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n as i64),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -330,23 +243,26 @@ fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
|
||||
fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0.0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0.0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => return Ok(None),
|
||||
Some(OnFailure::Default) => {
|
||||
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
|
||||
@@ -356,14 +272,14 @@ fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
}
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -376,12 +292,12 @@ macro_rules! coerce_string_value {
|
||||
Err(_) => match $transform.on_failure {
|
||||
Some(OnFailure::Ignore) => Ok(None),
|
||||
Some(OnFailure::Default) => match $transform.get_default() {
|
||||
Some(default) => coerce_value(default, $transform),
|
||||
None => coerce_value($transform.get_type_matched_default_val(), $transform),
|
||||
Some(default) => Ok(Some(default.clone())),
|
||||
None => $transform.get_type_matched_default_val().map(Some),
|
||||
},
|
||||
None => CoerceStringToTypeSnafu {
|
||||
s: $s,
|
||||
ty: $transform.type_.to_str_type(),
|
||||
ty: $transform.type_.as_str_name(),
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
@@ -389,92 +305,85 @@ macro_rules! coerce_string_value {
|
||||
};
|
||||
}
|
||||
|
||||
fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
fn coerce_string_value(s: &str, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match transform.type_ {
|
||||
Value::Int8(_) => {
|
||||
ColumnDataType::Int8 => {
|
||||
coerce_string_value!(s, transform, i32, I8Value)
|
||||
}
|
||||
Value::Int16(_) => {
|
||||
ColumnDataType::Int16 => {
|
||||
coerce_string_value!(s, transform, i32, I16Value)
|
||||
}
|
||||
Value::Int32(_) => {
|
||||
ColumnDataType::Int32 => {
|
||||
coerce_string_value!(s, transform, i32, I32Value)
|
||||
}
|
||||
Value::Int64(_) => {
|
||||
ColumnDataType::Int64 => {
|
||||
coerce_string_value!(s, transform, i64, I64Value)
|
||||
}
|
||||
|
||||
Value::Uint8(_) => {
|
||||
ColumnDataType::Uint8 => {
|
||||
coerce_string_value!(s, transform, u32, U8Value)
|
||||
}
|
||||
Value::Uint16(_) => {
|
||||
ColumnDataType::Uint16 => {
|
||||
coerce_string_value!(s, transform, u32, U16Value)
|
||||
}
|
||||
Value::Uint32(_) => {
|
||||
ColumnDataType::Uint32 => {
|
||||
coerce_string_value!(s, transform, u32, U32Value)
|
||||
}
|
||||
Value::Uint64(_) => {
|
||||
ColumnDataType::Uint64 => {
|
||||
coerce_string_value!(s, transform, u64, U64Value)
|
||||
}
|
||||
|
||||
Value::Float32(_) => {
|
||||
ColumnDataType::Float32 => {
|
||||
coerce_string_value!(s, transform, f32, F32Value)
|
||||
}
|
||||
Value::Float64(_) => {
|
||||
ColumnDataType::Float64 => {
|
||||
coerce_string_value!(s, transform, f64, F64Value)
|
||||
}
|
||||
|
||||
Value::Boolean(_) => {
|
||||
ColumnDataType::Boolean => {
|
||||
coerce_string_value!(s, transform, bool, BoolValue)
|
||||
}
|
||||
|
||||
Value::String(_) => Ok(Some(ValueData::StringValue(s.to_string()))),
|
||||
ColumnDataType::String => Ok(Some(ValueData::StringValue(s.to_string()))),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => Ok(None),
|
||||
Some(OnFailure::Default) => CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail(),
|
||||
None => CoerceUnsupportedEpochTypeSnafu { ty: "String" }.fail(),
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => CoerceStringToTypeSnafu {
|
||||
ColumnDataType::Binary => CoerceStringToTypeSnafu {
|
||||
s,
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail(),
|
||||
|
||||
Value::Null => Ok(None),
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_json_value(v: &Value, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
fn coerce_json_value(v: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match &transform.type_ {
|
||||
Value::Array(_) | Value::Map(_) => (),
|
||||
ColumnDataType::Binary => (),
|
||||
t => {
|
||||
return CoerceTypeToJsonSnafu {
|
||||
ty: t.to_str_type(),
|
||||
ty: t.as_str_name(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
match v {
|
||||
Value::Map(_) => {
|
||||
let data: jsonb::Value = v.into();
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
Value::Array(_) => {
|
||||
let data: jsonb::Value = v.into();
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
_ => CoerceTypeToJsonSnafu {
|
||||
ty: v.to_str_type(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
let data: jsonb::Value = vrl_value_to_jsonb_value(v);
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::field::Fields;
|
||||
|
||||
@@ -482,7 +391,7 @@ mod tests {
|
||||
fn test_coerce_string_without_on_failure() {
|
||||
let transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: None,
|
||||
@@ -491,14 +400,14 @@ mod tests {
|
||||
|
||||
// valid string
|
||||
{
|
||||
let val = Value::String("123".to_string());
|
||||
let val = VrlValue::Integer(123);
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(123)));
|
||||
}
|
||||
|
||||
// invalid string
|
||||
{
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
@@ -508,14 +417,14 @@ mod tests {
|
||||
fn test_coerce_string_with_on_failure_ignore() {
|
||||
let transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: Some(OnFailure::Ignore),
|
||||
tag: false,
|
||||
};
|
||||
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
@@ -524,7 +433,7 @@ mod tests {
|
||||
fn test_coerce_string_with_on_failure_default() {
|
||||
let mut transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: Some(OnFailure::Default),
|
||||
@@ -533,15 +442,15 @@ mod tests {
|
||||
|
||||
// with no explicit default value
|
||||
{
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(0)));
|
||||
}
|
||||
|
||||
// with explicit default value
|
||||
{
|
||||
transform.default = Some(Value::Int32(42));
|
||||
let val = Value::String("hello".to_string());
|
||||
transform.default = Some(ValueData::I32Value(42));
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(42)));
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,81 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct Array {
|
||||
pub values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Array {
|
||||
pub fn new() -> Self {
|
||||
Array { values: vec![] }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Array {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let values = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
write!(f, "[{}]", values)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Array {
|
||||
type Target = Vec<Value>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::DerefMut for Array {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for Array {
|
||||
type Item = Value;
|
||||
|
||||
type IntoIter = std::vec::IntoIter<Value>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.values.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Value>> for Array {
|
||||
fn from(values: Vec<Value>) -> Self {
|
||||
Array { values }
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<serde_json::Value>> for Array {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: Vec<serde_json::Value>) -> Result<Self> {
|
||||
let values = value
|
||||
.into_iter()
|
||||
.map(|v| v.try_into())
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(Array { values })
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct Map {
|
||||
pub values: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Map {
|
||||
pub fn one(key: impl Into<String>, value: Value) -> Map {
|
||||
let mut map = Map::default();
|
||||
map.insert(key, value);
|
||||
map
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: impl Into<String>, value: Value) {
|
||||
self.values.insert(key.into(), value);
|
||||
}
|
||||
|
||||
pub fn extend(&mut self, Map { values }: Map) {
|
||||
self.values.extend(values);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BTreeMap<String, Value>> for Map {
|
||||
fn from(values: BTreeMap<String, Value>) -> Self {
|
||||
Self { values }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Map {
|
||||
type Target = BTreeMap<String, Value>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::DerefMut for Map {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Map {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let values = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}: {}", k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
write!(f, "{{{}}}", values)
|
||||
}
|
||||
}
|
||||
@@ -1,140 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Timestamp {
|
||||
Nanosecond(i64),
|
||||
Microsecond(i64),
|
||||
Millisecond(i64),
|
||||
Second(i64),
|
||||
}
|
||||
|
||||
pub(crate) const NANOSECOND_RESOLUTION: &str = "nanosecond";
|
||||
pub(crate) const NANO_RESOLUTION: &str = "nano";
|
||||
pub(crate) const NS_RESOLUTION: &str = "ns";
|
||||
pub(crate) const MICROSECOND_RESOLUTION: &str = "microsecond";
|
||||
pub(crate) const MICRO_RESOLUTION: &str = "micro";
|
||||
pub(crate) const US_RESOLUTION: &str = "us";
|
||||
pub(crate) const MILLISECOND_RESOLUTION: &str = "millisecond";
|
||||
pub(crate) const MILLI_RESOLUTION: &str = "milli";
|
||||
pub(crate) const MS_RESOLUTION: &str = "ms";
|
||||
pub(crate) const SECOND_RESOLUTION: &str = "second";
|
||||
pub(crate) const SEC_RESOLUTION: &str = "sec";
|
||||
pub(crate) const S_RESOLUTION: &str = "s";
|
||||
|
||||
pub(crate) const VALID_RESOLUTIONS: [&str; 12] = [
|
||||
NANOSECOND_RESOLUTION,
|
||||
NANO_RESOLUTION,
|
||||
NS_RESOLUTION,
|
||||
MICROSECOND_RESOLUTION,
|
||||
MICRO_RESOLUTION,
|
||||
US_RESOLUTION,
|
||||
MILLISECOND_RESOLUTION,
|
||||
MILLI_RESOLUTION,
|
||||
MS_RESOLUTION,
|
||||
SECOND_RESOLUTION,
|
||||
SEC_RESOLUTION,
|
||||
S_RESOLUTION,
|
||||
];
|
||||
|
||||
impl Timestamp {
|
||||
pub(crate) fn timestamp_nanos(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v,
|
||||
Timestamp::Microsecond(v) => *v * 1_000,
|
||||
Timestamp::Millisecond(v) => *v * 1_000_000,
|
||||
Timestamp::Second(v) => *v * 1_000_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp_micros(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000,
|
||||
Timestamp::Microsecond(v) => *v,
|
||||
Timestamp::Millisecond(v) => *v * 1_000,
|
||||
Timestamp::Second(v) => *v * 1_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp_millis(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000_000,
|
||||
Timestamp::Microsecond(v) => *v / 1_000,
|
||||
Timestamp::Millisecond(v) => *v,
|
||||
Timestamp::Second(v) => *v * 1_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000_000_000,
|
||||
Timestamp::Microsecond(v) => *v / 1_000_000,
|
||||
Timestamp::Millisecond(v) => *v / 1_000,
|
||||
Timestamp::Second(v) => *v,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_unit(&self, unit: &TimeUnit) -> i64 {
|
||||
match unit {
|
||||
TimeUnit::Second => self.timestamp(),
|
||||
TimeUnit::Millisecond => self.timestamp_millis(),
|
||||
TimeUnit::Microsecond => self.timestamp_micros(),
|
||||
TimeUnit::Nanosecond => self.timestamp_nanos(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_unit(&self) -> TimeUnit {
|
||||
match self {
|
||||
Timestamp::Nanosecond(_) => TimeUnit::Nanosecond,
|
||||
Timestamp::Microsecond(_) => TimeUnit::Microsecond,
|
||||
Timestamp::Millisecond(_) => TimeUnit::Millisecond,
|
||||
Timestamp::Second(_) => TimeUnit::Second,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_datetime(&self) -> Option<DateTime<Utc>> {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => Some(DateTime::from_timestamp_nanos(*v)),
|
||||
Timestamp::Microsecond(v) => DateTime::from_timestamp_micros(*v),
|
||||
Timestamp::Millisecond(v) => DateTime::from_timestamp_millis(*v),
|
||||
Timestamp::Second(v) => DateTime::from_timestamp(*v, 0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_datetime(dt: DateTime<Utc>) -> Option<Self> {
|
||||
dt.timestamp_nanos_opt().map(Timestamp::Nanosecond)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Timestamp {
|
||||
fn default() -> Self {
|
||||
Timestamp::Nanosecond(chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Timestamp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let (value, resolution) = match self {
|
||||
Timestamp::Nanosecond(v) => (v, NANOSECOND_RESOLUTION),
|
||||
Timestamp::Microsecond(v) => (v, MICROSECOND_RESOLUTION),
|
||||
Timestamp::Millisecond(v) => (v, MILLISECOND_RESOLUTION),
|
||||
Timestamp::Second(v) => (v, SECOND_RESOLUTION),
|
||||
};
|
||||
|
||||
write!(f, "{}, resolution: {}", value, resolution)
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(string_from_utf8_lossy_owned)]
|
||||
|
||||
mod dispatcher;
|
||||
pub mod error;
|
||||
mod etl;
|
||||
@@ -24,10 +26,8 @@ pub use etl::processor::Processor;
|
||||
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
|
||||
pub use etl::transform::transformer::identity_pipeline;
|
||||
pub use etl::transform::GreptimeTransformer;
|
||||
pub use etl::value::{Array, Map, Timestamp, Value};
|
||||
pub use etl::{
|
||||
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map, Content,
|
||||
DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
|
||||
parse, Content, DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
|
||||
};
|
||||
pub use manager::{
|
||||
pipeline_operator, table, util, IdentityTimeIndex, PipelineContext, PipelineDefinition,
|
||||
|
||||
@@ -16,18 +16,22 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::ColumnDataType;
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datatypes::timestamp::TimestampNanosecond;
|
||||
use itertools::Itertools;
|
||||
use session::context::Channel;
|
||||
use snafu::ensure;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use util::to_pipeline_version;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{CastTypeSnafu, InvalidCustomTimeIndexSnafu, PipelineMissingSnafu, Result};
|
||||
use crate::etl::value::time::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
|
||||
use crate::error::{
|
||||
CastTypeSnafu, InvalidCustomTimeIndexSnafu, InvalidTimestampSnafu, PipelineMissingSnafu, Result,
|
||||
};
|
||||
use crate::etl::value::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
|
||||
use crate::table::PipelineTable;
|
||||
use crate::{GreptimePipelineParams, Pipeline, Value};
|
||||
use crate::{GreptimePipelineParams, Pipeline};
|
||||
|
||||
mod pipeline_cache;
|
||||
pub mod pipeline_operator;
|
||||
@@ -232,7 +236,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_column_name(&self) -> &String {
|
||||
pub fn get_column_name(&self) -> &str {
|
||||
match self {
|
||||
IdentityTimeIndex::Epoch(field, _, _) => field,
|
||||
IdentityTimeIndex::DateStr(field, _, _) => field,
|
||||
@@ -258,25 +262,25 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_timestamp(&self, value: Option<&Value>) -> Result<ValueData> {
|
||||
pub fn get_timestamp_value(&self, value: Option<&VrlValue>) -> Result<ValueData> {
|
||||
match self {
|
||||
IdentityTimeIndex::Epoch(_, unit, ignore_errors) => {
|
||||
let v = match value {
|
||||
Some(Value::Int32(v)) => *v as i64,
|
||||
Some(Value::Int64(v)) => *v,
|
||||
Some(Value::Uint32(v)) => *v as i64,
|
||||
Some(Value::Uint64(v)) => *v as i64,
|
||||
Some(Value::String(s)) => match s.parse::<i64>() {
|
||||
Some(VrlValue::Integer(v)) => *v,
|
||||
Some(VrlValue::Bytes(s)) => match String::from_utf8_lossy(s).parse::<i64>() {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
*unit,
|
||||
format!("failed to convert {} to number", s),
|
||||
format!(
|
||||
"failed to convert {} to number",
|
||||
String::from_utf8_lossy(s)
|
||||
),
|
||||
)
|
||||
}
|
||||
},
|
||||
Some(Value::Timestamp(timestamp)) => timestamp.to_unit(unit),
|
||||
Some(VrlValue::Timestamp(timestamp)) => datetime_utc_to_unit(timestamp, unit)?,
|
||||
Some(v) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
@@ -292,7 +296,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
IdentityTimeIndex::DateStr(_, format, ignore_errors) => {
|
||||
let v = match value {
|
||||
Some(Value::String(s)) => s,
|
||||
Some(VrlValue::Bytes(s)) => String::from_utf8_lossy(s),
|
||||
Some(v) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
@@ -309,7 +313,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = match chrono::DateTime::parse_from_str(v, format) {
|
||||
let timestamp = match chrono::DateTime::parse_from_str(&v, format) {
|
||||
Ok(ts) => ts,
|
||||
Err(_) => {
|
||||
return if_ignore_errors(
|
||||
@@ -321,13 +325,31 @@ impl IdentityTimeIndex {
|
||||
};
|
||||
|
||||
Ok(ValueData::TimestampNanosecondValue(
|
||||
timestamp.timestamp_nanos_opt().unwrap_or_default(),
|
||||
timestamp
|
||||
.timestamp_nanos_opt()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: timestamp.to_rfc3339(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn datetime_utc_to_unit(timestamp: &DateTime<Utc>, unit: &TimeUnit) -> Result<i64> {
|
||||
let ts = match unit {
|
||||
TimeUnit::Nanosecond => timestamp
|
||||
.timestamp_nanos_opt()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: timestamp.to_rfc3339(),
|
||||
})?,
|
||||
TimeUnit::Microsecond => timestamp.timestamp_micros(),
|
||||
TimeUnit::Millisecond => timestamp.timestamp_millis(),
|
||||
TimeUnit::Second => timestamp.timestamp(),
|
||||
};
|
||||
Ok(ts)
|
||||
}
|
||||
|
||||
fn if_ignore_errors(ignore_errors: bool, unit: TimeUnit, msg: String) -> Result<ValueData> {
|
||||
if ignore_errors {
|
||||
Ok(time_unit_to_value_data(
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
use dyn_fmt::AsStrFormatExt;
|
||||
use regex::Regex;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
use yaml_rust::Yaml;
|
||||
|
||||
use crate::error::{
|
||||
Error, InvalidTableSuffixTemplateSnafu, RequiredTableSuffixTemplateSnafu, Result,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
const REPLACE_KEY: &str = "{}";
|
||||
|
||||
@@ -47,22 +47,16 @@ pub(crate) struct TableSuffixTemplate {
|
||||
}
|
||||
|
||||
impl TableSuffixTemplate {
|
||||
pub fn apply(&self, val: &Value) -> Option<String> {
|
||||
pub fn apply(&self, val: &VrlValue) -> Option<String> {
|
||||
let val = val.as_object()?;
|
||||
let values = self
|
||||
.keys
|
||||
.iter()
|
||||
.filter_map(|key| {
|
||||
let v = val.get(key)?;
|
||||
let v = val.get(key.as_str())?;
|
||||
match v {
|
||||
Value::Int8(v) => Some(v.to_string()),
|
||||
Value::Int16(v) => Some(v.to_string()),
|
||||
Value::Int32(v) => Some(v.to_string()),
|
||||
Value::Int64(v) => Some(v.to_string()),
|
||||
Value::Uint8(v) => Some(v.to_string()),
|
||||
Value::Uint16(v) => Some(v.to_string()),
|
||||
Value::Uint32(v) => Some(v.to_string()),
|
||||
Value::Uint64(v) => Some(v.to_string()),
|
||||
Value::String(v) => Some(v.clone()),
|
||||
VrlValue::Integer(v) => Some(v.to_string()),
|
||||
VrlValue::Bytes(v) => Some(String::from_utf8_lossy_owned(v.to_vec())),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
|
||||
@@ -13,11 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
|
||||
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
/// test util function to parse and execute pipeline
|
||||
pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
|
||||
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
|
||||
let input_value = serde_json::from_str::<VrlValue>(input_str).unwrap();
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline");
|
||||
@@ -32,21 +33,19 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
|
||||
let mut rows = Vec::new();
|
||||
|
||||
match input_value {
|
||||
serde_json::Value::Array(array) => {
|
||||
VrlValue::Array(array) => {
|
||||
for value in array {
|
||||
let intermediate_status = json_to_map(value).unwrap();
|
||||
let row = pipeline
|
||||
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
|
||||
.exec_mut(value, &pipeline_ctx, &mut schema_info)
|
||||
.expect("failed to exec pipeline")
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
rows.push(row.0);
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(_) => {
|
||||
let intermediate_status = json_to_map(input_value).unwrap();
|
||||
VrlValue::Object(_) => {
|
||||
let row = pipeline
|
||||
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
|
||||
.exec_mut(input_value, &pipeline_ctx, &mut schema_info)
|
||||
.expect("failed to exec pipeline")
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
|
||||
@@ -16,7 +16,7 @@ mod common;
|
||||
|
||||
use greptime_proto::v1::value::ValueData::StringValue;
|
||||
use greptime_proto::v1::{ColumnDataType, SemanticType};
|
||||
use pipeline::{json_to_map, setup_pipeline, PipelineContext};
|
||||
use pipeline::{setup_pipeline, PipelineContext};
|
||||
|
||||
fn make_string_column_schema(name: String) -> greptime_proto::v1::ColumnSchema {
|
||||
common::make_column_schema(name, ColumnDataType::String, SemanticType::Field)
|
||||
@@ -282,7 +282,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let result = json_to_map(input_value).unwrap();
|
||||
let result = input_value.into();
|
||||
|
||||
let row = pipeline.exec_mut(result, &pipeline_ctx, &mut schema_info);
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use greptime_proto::v1::value::ValueData::{
|
||||
U32Value, U64Value, U8Value,
|
||||
};
|
||||
use greptime_proto::v1::Value as GreptimeValue;
|
||||
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
|
||||
#[test]
|
||||
fn test_complex_data() {
|
||||
@@ -425,7 +425,7 @@ transform:
|
||||
&pipeline_param,
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let stats = json_to_map(input_value).unwrap();
|
||||
let stats = input_value.into();
|
||||
|
||||
let row = pipeline
|
||||
.exec_mut(stats, &pipeline_ctx, &mut schema_info)
|
||||
@@ -500,7 +500,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -615,7 +615,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -687,7 +687,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -733,7 +733,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -798,7 +798,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -845,7 +845,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -913,7 +913,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value1).unwrap();
|
||||
let status = input_value1.into();
|
||||
let dispatched_to = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -922,7 +922,7 @@ transform:
|
||||
assert_eq!(dispatched_to.table_suffix, "http");
|
||||
assert_eq!(dispatched_to.pipeline.unwrap(), "access_log_pipeline");
|
||||
|
||||
let status = json_to_map(input_value2).unwrap();
|
||||
let status = input_value2.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -983,7 +983,7 @@ table_suffix: _${logger}
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let exec_re = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap();
|
||||
|
||||
@@ -340,7 +340,14 @@ impl ExecutionPlan for RangeManipulateExec {
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<Distribution> {
|
||||
self.input.required_input_distribution()
|
||||
let input_requirement = self.input.required_input_distribution();
|
||||
if input_requirement.is_empty() {
|
||||
// if the input is EmptyMetric, its required_input_distribution() is empty so we can't
|
||||
// use its input distribution.
|
||||
vec![Distribution::UnspecifiedDistribution]
|
||||
} else {
|
||||
input_requirement
|
||||
}
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
|
||||
@@ -71,6 +71,7 @@ store-api.workspace = true
|
||||
substrait.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
unescaper = "0.1"
|
||||
uuid.workspace = true
|
||||
|
||||
|
||||
@@ -155,7 +155,23 @@ struct PlanRewriter {
|
||||
/// Partition columns of the table in current pass
|
||||
partition_cols: Option<Vec<String>>,
|
||||
column_requirements: HashSet<Column>,
|
||||
/// Whether to expand on next call
|
||||
/// This is used to handle the case where a plan is transformed, but need to be expanded from it's
|
||||
/// parent node. For example a Aggregate plan is split into two parts in frontend and datanode, and need
|
||||
/// to be expanded from the parent node of the Aggregate plan.
|
||||
expand_on_next_call: bool,
|
||||
/// Expanding on next partial/conditional/transformed commutative plan
|
||||
/// This is used to handle the case where a plan is transformed, but still
|
||||
/// need to push down as many node as possible before next partial/conditional/transformed commutative
|
||||
/// plan. I.e.
|
||||
/// ```
|
||||
/// Limit:
|
||||
/// Sort:
|
||||
/// ```
|
||||
/// where `Limit` is partial commutative, and `Sort` is conditional commutative.
|
||||
/// In this case, we need to expand the `Limit` plan,
|
||||
/// so that we can push down the `Sort` plan as much as possible.
|
||||
expand_on_next_part_cond_trans_commutative: bool,
|
||||
new_child_plan: Option<LogicalPlan>,
|
||||
}
|
||||
|
||||
@@ -177,15 +193,38 @@ impl PlanRewriter {
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.expand_on_next_call {
|
||||
self.expand_on_next_call = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.expand_on_next_part_cond_trans_commutative {
|
||||
let comm = Categorizer::check_plan(plan, self.partition_cols.clone());
|
||||
match comm {
|
||||
Commutativity::PartialCommutative => {
|
||||
// a small difference is that for partial commutative, we still need to
|
||||
// expand on next call(so `Limit` can be pushed down)
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
self.expand_on_next_call = true;
|
||||
}
|
||||
Commutativity::ConditionalCommutative(_)
|
||||
| Commutativity::TransformedCommutative { .. } => {
|
||||
// for conditional commutative and transformed commutative, we can
|
||||
// expand now
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
return true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
|
||||
Commutativity::Commutative => {}
|
||||
Commutativity::PartialCommutative => {
|
||||
if let Some(plan) = partial_commutative_transformer(plan) {
|
||||
self.update_column_requirements(&plan);
|
||||
self.expand_on_next_part_cond_trans_commutative = true;
|
||||
self.stage.push(plan)
|
||||
}
|
||||
}
|
||||
@@ -194,6 +233,7 @@ impl PlanRewriter {
|
||||
&& let Some(plan) = transformer(plan)
|
||||
{
|
||||
self.update_column_requirements(&plan);
|
||||
self.expand_on_next_part_cond_trans_commutative = true;
|
||||
self.stage.push(plan)
|
||||
}
|
||||
}
|
||||
@@ -202,7 +242,7 @@ impl PlanRewriter {
|
||||
&& let Some(transformer_actions) = transformer(plan)
|
||||
{
|
||||
debug!(
|
||||
"PlanRewriter: transformed plan: {:#?}\n from {plan}",
|
||||
"PlanRewriter: transformed plan: {:?}\n from {plan}",
|
||||
transformer_actions.extra_parent_plans
|
||||
);
|
||||
if let Some(last_stage) = transformer_actions.extra_parent_plans.last() {
|
||||
@@ -226,6 +266,10 @@ impl PlanRewriter {
|
||||
}
|
||||
|
||||
fn update_column_requirements(&mut self, plan: &LogicalPlan) {
|
||||
debug!(
|
||||
"PlanRewriter: update column requirements for plan: {plan}\n withcolumn_requirements: {:?}",
|
||||
self.column_requirements
|
||||
);
|
||||
let mut container = HashSet::new();
|
||||
for expr in plan.expressions() {
|
||||
// this method won't fail
|
||||
@@ -235,6 +279,10 @@ impl PlanRewriter {
|
||||
for col in container {
|
||||
self.column_requirements.insert(col);
|
||||
}
|
||||
debug!(
|
||||
"PlanRewriter: updated column requirements: {:?}",
|
||||
self.column_requirements
|
||||
);
|
||||
}
|
||||
|
||||
fn is_expanded(&self) -> bool {
|
||||
|
||||
@@ -716,17 +716,19 @@ impl PromPlanner {
|
||||
..
|
||||
} = vs;
|
||||
let matchers = self.preprocess_label_matchers(matchers, name)?;
|
||||
if let Some(empty_plan) = self.setup_context().await? {
|
||||
return Ok(empty_plan);
|
||||
}
|
||||
|
||||
ensure!(!range.is_zero(), ZeroRangeSelectorSnafu);
|
||||
let range_ms = range.as_millis() as _;
|
||||
self.ctx.range = Some(range_ms);
|
||||
|
||||
let normalize = self
|
||||
.selector_to_series_normalize_plan(offset, matchers, true)
|
||||
.await?;
|
||||
// Some functions like rate may require special fields in the RangeManipulate plan
|
||||
// so we can't skip RangeManipulate.
|
||||
let normalize = match self.setup_context().await? {
|
||||
Some(empty_plan) => empty_plan,
|
||||
None => {
|
||||
self.selector_to_series_normalize_plan(offset, matchers, true)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let manipulate = RangeManipulate::new(
|
||||
self.ctx.start,
|
||||
self.ctx.end,
|
||||
|
||||
@@ -125,8 +125,10 @@ tonic.workspace = true
|
||||
tonic-reflection = "0.12"
|
||||
tower = { workspace = true, features = ["full"] }
|
||||
tower-http = { version = "0.6", features = ["full"] }
|
||||
tracing.workspace = true
|
||||
urlencoding = "2.1"
|
||||
uuid.workspace = true
|
||||
vrl.workspace = true
|
||||
zstd.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
@@ -30,9 +31,10 @@ use pipeline::{
|
||||
use serde_json::{json, Deserializer, Value};
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu, PipelineSnafu,
|
||||
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu,
|
||||
Result as ServersResult,
|
||||
};
|
||||
use crate::http::event::{
|
||||
@@ -287,8 +289,8 @@ fn parse_bulk_request(
|
||||
msg_field: &Option<String>,
|
||||
) -> ServersResult<Vec<PipelineIngestRequest>> {
|
||||
// Read the ndjson payload and convert it to `Vec<Value>`. Return error if the input is not a valid JSON.
|
||||
let values: Vec<Value> = Deserializer::from_str(input)
|
||||
.into_iter::<Value>()
|
||||
let values: Vec<VrlValue> = Deserializer::from_str(input)
|
||||
.into_iter::<VrlValue>()
|
||||
.collect::<Result<_, _>>()
|
||||
.context(ParseJsonSnafu)?;
|
||||
|
||||
@@ -307,12 +309,13 @@ fn parse_bulk_request(
|
||||
// For Elasticsearch post `_bulk` API, each chunk contains two objects:
|
||||
// 1. The first object is the command, it should be `create` or `index`.
|
||||
// 2. The second object is the document data.
|
||||
while let Some(mut cmd) = values.next() {
|
||||
while let Some(cmd) = values.next() {
|
||||
// NOTE: Although the native Elasticsearch API supports upsert in `index` command, we don't support change any data in `index` command and it's same as `create` command.
|
||||
let index = if let Some(cmd) = cmd.get_mut("create") {
|
||||
get_index_from_cmd(cmd.take())?
|
||||
} else if let Some(cmd) = cmd.get_mut("index") {
|
||||
get_index_from_cmd(cmd.take())?
|
||||
let mut cmd = cmd.into_object();
|
||||
let index = if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("create")) {
|
||||
get_index_from_cmd(cmd)?
|
||||
} else if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("index")) {
|
||||
get_index_from_cmd(cmd)?
|
||||
} else {
|
||||
return InvalidElasticsearchInputSnafu {
|
||||
reason: format!(
|
||||
@@ -339,7 +342,6 @@ fn parse_bulk_request(
|
||||
}
|
||||
);
|
||||
|
||||
let log_value = pipeline::json_to_map(log_value).context(PipelineSnafu)?;
|
||||
requests.push(PipelineIngestRequest {
|
||||
table: index.unwrap_or_else(|| index_from_url.as_ref().unwrap().clone()),
|
||||
values: vec![log_value],
|
||||
@@ -357,39 +359,50 @@ fn parse_bulk_request(
|
||||
}
|
||||
|
||||
// Get the index from the command. We will take index as the table name in GreptimeDB.
|
||||
fn get_index_from_cmd(mut v: Value) -> ServersResult<Option<String>> {
|
||||
if let Some(index) = v.get_mut("_index") {
|
||||
if let Value::String(index) = index.take() {
|
||||
Ok(Some(index))
|
||||
} else {
|
||||
// If the `_index` exists, it should be a string.
|
||||
InvalidElasticsearchInputSnafu {
|
||||
reason: "index is not a string in bulk request".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
fn get_index_from_cmd(v: VrlValue) -> ServersResult<Option<String>> {
|
||||
let Some(index) = v.into_object().and_then(|mut m| m.remove("_index")) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if let VrlValue::Bytes(index) = index {
|
||||
Ok(Some(String::from_utf8_lossy(&index).to_string()))
|
||||
} else {
|
||||
Ok(None)
|
||||
// If the `_index` exists, it should be a string.
|
||||
InvalidElasticsearchInputSnafu {
|
||||
reason: "index is not a string in bulk request",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
// If the msg_field is provided, fetch the value of the field from the document data.
|
||||
// For example, if the `msg_field` is `message`, and the document data is `{"message":"hello"}`, the log value will be Value::String("hello").
|
||||
fn get_log_value_from_msg_field(mut v: Value, msg_field: &str) -> Value {
|
||||
if let Some(message) = v.get_mut(msg_field) {
|
||||
let message = message.take();
|
||||
fn get_log_value_from_msg_field(v: VrlValue, msg_field: &str) -> VrlValue {
|
||||
let VrlValue::Object(mut m) = v else {
|
||||
return v;
|
||||
};
|
||||
|
||||
if let Some(message) = m.remove(msg_field) {
|
||||
match message {
|
||||
Value::String(s) => match serde_json::from_str::<Value>(&s) {
|
||||
Ok(s) => s,
|
||||
// If the message is not a valid JSON, return a map with the original message key and value.
|
||||
Err(_) => json!({msg_field: s}),
|
||||
},
|
||||
VrlValue::Bytes(bytes) => {
|
||||
match serde_json::from_slice::<VrlValue>(&bytes) {
|
||||
Ok(v) => v,
|
||||
// If the message is not a valid JSON, return a map with the original message key and value.
|
||||
Err(_) => {
|
||||
let map = BTreeMap::from([(
|
||||
msg_field.to_string().into(),
|
||||
VrlValue::Bytes(bytes),
|
||||
)]);
|
||||
VrlValue::Object(map)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If the message is not a string, just use the original message as the log value.
|
||||
_ => message,
|
||||
}
|
||||
} else {
|
||||
// If the msg_field is not found, just use the original message as the log value.
|
||||
v
|
||||
VrlValue::Object(m)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -414,12 +427,14 @@ mod tests {
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap(),
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -436,11 +451,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -457,11 +476,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -477,7 +500,9 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -494,11 +519,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -516,13 +545,13 @@ mod tests {
|
||||
PipelineIngestRequest {
|
||||
table: "logs-generic-default".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""})).unwrap(),
|
||||
json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs-generic-default".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""})).unwrap(),
|
||||
json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
|
||||
@@ -269,7 +269,9 @@ pub async fn write_system_metric_by_handler(
|
||||
if let Err(e) = handler.write(requests, ctx.clone(), false).await {
|
||||
error!(e; "report export metrics by handler failed");
|
||||
} else {
|
||||
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(samples as u64);
|
||||
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES
|
||||
.with_label_values(&[ctx.get_db_string().as_str()])
|
||||
.inc_by(samples as u64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,14 +35,14 @@ use headers::ContentType;
|
||||
use lazy_static::lazy_static;
|
||||
use mime_guess::mime;
|
||||
use pipeline::util::to_pipeline_version;
|
||||
use pipeline::{
|
||||
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value as PipelineValue,
|
||||
};
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Deserializer, Map, Value as JsonValue};
|
||||
use session::context::{Channel, QueryContext, QueryContextRef};
|
||||
use simd_json::Buffers;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result,
|
||||
@@ -117,7 +117,7 @@ pub(crate) struct PipelineIngestRequest {
|
||||
/// The table where the log data will be written to.
|
||||
pub table: String,
|
||||
/// The log data to be ingested.
|
||||
pub values: Vec<PipelineValue>,
|
||||
pub values: Vec<VrlValue>,
|
||||
}
|
||||
|
||||
pub struct PipelineContent(String);
|
||||
@@ -295,18 +295,18 @@ pub async fn delete_pipeline(
|
||||
/// Transform NDJSON array into a single array
|
||||
/// always return an array
|
||||
fn transform_ndjson_array_factory(
|
||||
values: impl IntoIterator<Item = Result<JsonValue, serde_json::Error>>,
|
||||
values: impl IntoIterator<Item = Result<VrlValue, serde_json::Error>>,
|
||||
ignore_error: bool,
|
||||
) -> Result<Vec<JsonValue>> {
|
||||
) -> Result<Vec<VrlValue>> {
|
||||
values
|
||||
.into_iter()
|
||||
.try_fold(Vec::with_capacity(100), |mut acc_array, item| match item {
|
||||
Ok(item_value) => {
|
||||
match item_value {
|
||||
JsonValue::Array(item_array) => {
|
||||
VrlValue::Array(item_array) => {
|
||||
acc_array.extend(item_array);
|
||||
}
|
||||
JsonValue::Object(_) => {
|
||||
VrlValue::Object(_) => {
|
||||
acc_array.push(item_value);
|
||||
}
|
||||
_ => {
|
||||
@@ -331,7 +331,7 @@ fn transform_ndjson_array_factory(
|
||||
|
||||
/// Dryrun pipeline with given data
|
||||
async fn dryrun_pipeline_inner(
|
||||
value: Vec<PipelineValue>,
|
||||
value: Vec<VrlValue>,
|
||||
pipeline: Arc<pipeline::Pipeline>,
|
||||
pipeline_handler: PipelineHandlerRef,
|
||||
query_ctx: &QueryContextRef,
|
||||
@@ -494,7 +494,7 @@ fn add_step_info_for_pipeline_dryrun_error(step_msg: &str, e: Error) -> Response
|
||||
/// Parse the data with given content type
|
||||
/// If the content type is invalid, return error
|
||||
/// content type is one of application/json, text/plain, application/x-ndjson
|
||||
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<PipelineValue>> {
|
||||
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<VrlValue>> {
|
||||
if let Ok(content_type) = ContentType::from_str(&data_type) {
|
||||
extract_pipeline_value_by_content_type(content_type, Bytes::from(data), false)
|
||||
} else {
|
||||
@@ -741,17 +741,15 @@ impl<'a> TryFrom<&'a ContentType> for EventPayloadResolver<'a> {
|
||||
}
|
||||
|
||||
impl EventPayloadResolver<'_> {
|
||||
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<PipelineValue>> {
|
||||
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<VrlValue>> {
|
||||
match self.inner {
|
||||
EventPayloadResolverInner::Json => {
|
||||
pipeline::json_array_to_map(transform_ndjson_array_factory(
|
||||
Deserializer::from_slice(&payload).into_iter(),
|
||||
ignore_errors,
|
||||
)?)
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
EventPayloadResolverInner::Json => transform_ndjson_array_factory(
|
||||
Deserializer::from_slice(&payload).into_iter(),
|
||||
ignore_errors,
|
||||
),
|
||||
EventPayloadResolverInner::Ndjson => {
|
||||
let mut result = Vec::with_capacity(1000);
|
||||
let mut buffer = Buffers::new(1000);
|
||||
for (index, line) in payload.lines().enumerate() {
|
||||
let mut line = match line {
|
||||
Ok(line) if !line.is_empty() => line,
|
||||
@@ -768,8 +766,10 @@ impl EventPayloadResolver<'_> {
|
||||
|
||||
// simd_json, according to description, only de-escapes string at character level,
|
||||
// like any other json parser. So it should be safe here.
|
||||
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
|
||||
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
|
||||
if let Ok(v) = simd_json::serde::from_slice_with_buffers(
|
||||
unsafe { line.as_bytes_mut() },
|
||||
&mut buffer,
|
||||
) {
|
||||
result.push(v);
|
||||
} else if !ignore_errors {
|
||||
warn!("invalid JSON at index: {}, content: {:?}", index, line);
|
||||
@@ -787,8 +787,11 @@ impl EventPayloadResolver<'_> {
|
||||
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
|
||||
.map(|line| {
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert("message".to_string(), PipelineValue::String(line));
|
||||
PipelineValue::Map(map.into())
|
||||
map.insert(
|
||||
KeyString::from("message"),
|
||||
VrlValue::Bytes(Bytes::from(line)),
|
||||
);
|
||||
VrlValue::Object(map)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(result)
|
||||
@@ -801,7 +804,7 @@ fn extract_pipeline_value_by_content_type(
|
||||
content_type: ContentType,
|
||||
payload: Bytes,
|
||||
ignore_errors: bool,
|
||||
) -> Result<Vec<PipelineValue>> {
|
||||
) -> Result<Vec<VrlValue>> {
|
||||
EventPayloadResolver::try_from(&content_type).and_then(|resolver| {
|
||||
resolver
|
||||
.parse_payload(payload, ignore_errors)
|
||||
@@ -899,36 +902,37 @@ pub struct LogState {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_transform_ndjson() {
|
||||
let s = "{\"a\": 1}\n{\"b\": 2}";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
|
||||
|
||||
let s = "{\"a\": 1}";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1}]");
|
||||
|
||||
let s = "[{\"a\": 1}]";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1}]");
|
||||
|
||||
let s = "[{\"a\": 1}, {\"b\": 2}]";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
|
||||
}
|
||||
|
||||
@@ -945,21 +949,18 @@ mod tests {
|
||||
let fail_rest =
|
||||
extract_pipeline_value_by_content_type(ContentType::json(), payload.clone(), true);
|
||||
assert!(fail_rest.is_ok());
|
||||
assert_eq!(
|
||||
fail_rest.unwrap(),
|
||||
pipeline::json_array_to_map(vec![json!({"a": 1})]).unwrap()
|
||||
);
|
||||
assert_eq!(fail_rest.unwrap(), vec![json!({"a": 1}).into()]);
|
||||
|
||||
let fail_only_wrong =
|
||||
extract_pipeline_value_by_content_type(NDJSON_CONTENT_TYPE.clone(), payload, true);
|
||||
assert!(fail_only_wrong.is_ok());
|
||||
|
||||
let mut map1 = BTreeMap::new();
|
||||
map1.insert("a".to_string(), PipelineValue::Uint64(1));
|
||||
let map1 = PipelineValue::Map(map1.into());
|
||||
map1.insert(KeyString::from("a"), VrlValue::Integer(1));
|
||||
let map1 = VrlValue::Object(map1);
|
||||
let mut map2 = BTreeMap::new();
|
||||
map2.insert("c".to_string(), PipelineValue::Uint64(1));
|
||||
let map2 = PipelineValue::Map(map2.into());
|
||||
map2.insert(KeyString::from("c"), VrlValue::Integer(1));
|
||||
let map2 = VrlValue::Object(map2);
|
||||
assert_eq!(fail_only_wrong.unwrap(), vec![map1, map2]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ use axum::extract::State;
|
||||
use axum::Extension;
|
||||
use axum_extra::TypedHeader;
|
||||
use bytes::Bytes;
|
||||
use chrono::DateTime;
|
||||
use common_query::prelude::GREPTIME_TIMESTAMP;
|
||||
use common_query::{Output, OutputData};
|
||||
use common_telemetry::{error, warn};
|
||||
@@ -39,6 +40,7 @@ use prost::Message;
|
||||
use quoted_string::test_utils::TestSpec;
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DecodeOtlpRequestSnafu, InvalidLokiLabelsSnafu, InvalidLokiPayloadSnafu, ParseJsonSnafu,
|
||||
@@ -197,7 +199,7 @@ pub async fn loki_ingest(
|
||||
}
|
||||
|
||||
/// This is the holder of the loki lines parsed from json or protobuf.
|
||||
/// The generic here is either [serde_json::Value] or [Vec<LabelPairAdapter>].
|
||||
/// The generic here is either [VrlValue] or [Vec<LabelPairAdapter>].
|
||||
/// Depending on the target destination, this can be converted to [LokiRawItem] or [LokiPipeline].
|
||||
pub struct LokiMiddleItem<T> {
|
||||
pub ts: i64,
|
||||
@@ -218,7 +220,7 @@ pub struct LokiRawItem {
|
||||
|
||||
/// This is the line item prepared for the pipeline engine.
|
||||
pub struct LokiPipeline {
|
||||
pub map: pipeline::Value,
|
||||
pub map: VrlValue,
|
||||
}
|
||||
|
||||
/// This is the flow of the Loki ingestion.
|
||||
@@ -255,7 +257,7 @@ pub struct LokiPipeline {
|
||||
/// +------------------+ +---------------------+
|
||||
fn extract_item<T>(content_type: ContentType, bytes: Bytes) -> Result<Box<dyn Iterator<Item = T>>>
|
||||
where
|
||||
LokiMiddleItem<serde_json::Value>: Into<T>,
|
||||
LokiMiddleItem<VrlValue>: Into<T>,
|
||||
LokiMiddleItem<Vec<LabelPairAdapter>>: Into<T>,
|
||||
{
|
||||
match content_type {
|
||||
@@ -270,15 +272,14 @@ where
|
||||
}
|
||||
|
||||
struct LokiJsonParser {
|
||||
pub streams: VecDeque<serde_json::Value>,
|
||||
pub streams: VecDeque<VrlValue>,
|
||||
}
|
||||
|
||||
impl LokiJsonParser {
|
||||
pub fn from_bytes(bytes: Bytes) -> Result<Self> {
|
||||
let payload: serde_json::Value =
|
||||
serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
|
||||
let payload: VrlValue = serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
|
||||
|
||||
let serde_json::Value::Object(mut map) = payload else {
|
||||
let VrlValue::Object(mut map) = payload else {
|
||||
return InvalidLokiPayloadSnafu {
|
||||
msg: "payload is not an object",
|
||||
}
|
||||
@@ -289,7 +290,7 @@ impl LokiJsonParser {
|
||||
msg: "missing streams",
|
||||
})?;
|
||||
|
||||
let serde_json::Value::Array(streams) = streams else {
|
||||
let VrlValue::Array(streams) = streams else {
|
||||
return InvalidLokiPayloadSnafu {
|
||||
msg: "streams is not an array",
|
||||
}
|
||||
@@ -308,7 +309,7 @@ impl Iterator for LokiJsonParser {
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(stream) = self.streams.pop_front() {
|
||||
// get lines from the map
|
||||
let serde_json::Value::Object(mut map) = stream else {
|
||||
let VrlValue::Object(mut map) = stream else {
|
||||
warn!("stream is not an object, {:?}", stream);
|
||||
continue;
|
||||
};
|
||||
@@ -316,7 +317,7 @@ impl Iterator for LokiJsonParser {
|
||||
warn!("missing lines on stream, {:?}", map);
|
||||
continue;
|
||||
};
|
||||
let serde_json::Value::Array(lines) = lines else {
|
||||
let VrlValue::Array(lines) = lines else {
|
||||
warn!("lines is not an array, {:?}", lines);
|
||||
continue;
|
||||
};
|
||||
@@ -325,13 +326,15 @@ impl Iterator for LokiJsonParser {
|
||||
let labels = map
|
||||
.remove(LABEL_KEY)
|
||||
.and_then(|m| match m {
|
||||
serde_json::Value::Object(labels) => Some(labels),
|
||||
VrlValue::Object(labels) => Some(labels),
|
||||
_ => None,
|
||||
})
|
||||
.map(|m| {
|
||||
m.into_iter()
|
||||
.filter_map(|(k, v)| match v {
|
||||
serde_json::Value::String(v) => Some((k, v)),
|
||||
VrlValue::Bytes(v) => {
|
||||
Some((k.into(), String::from_utf8_lossy(&v).to_string()))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<BTreeMap<String, String>>()
|
||||
@@ -347,16 +350,16 @@ impl Iterator for LokiJsonParser {
|
||||
}
|
||||
|
||||
struct JsonStreamItem {
|
||||
pub lines: VecDeque<serde_json::Value>,
|
||||
pub lines: VecDeque<VrlValue>,
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
impl Iterator for JsonStreamItem {
|
||||
type Item = LokiMiddleItem<serde_json::Value>;
|
||||
type Item = LokiMiddleItem<VrlValue>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(line) = self.lines.pop_front() {
|
||||
let serde_json::Value::Array(line) = line else {
|
||||
let VrlValue::Array(line) = line else {
|
||||
warn!("line is not an array, {:?}", line);
|
||||
continue;
|
||||
};
|
||||
@@ -364,11 +367,11 @@ impl Iterator for JsonStreamItem {
|
||||
warn!("line is too short, {:?}", line);
|
||||
continue;
|
||||
}
|
||||
let mut line: VecDeque<serde_json::Value> = line.into();
|
||||
let mut line: VecDeque<VrlValue> = line.into();
|
||||
|
||||
// get ts
|
||||
let ts = line.pop_front().and_then(|ts| match ts {
|
||||
serde_json::Value::String(ts) => ts.parse::<i64>().ok(),
|
||||
VrlValue::Bytes(ts) => String::from_utf8_lossy(&ts).parse::<i64>().ok(),
|
||||
_ => {
|
||||
warn!("missing or invalid timestamp, {:?}", ts);
|
||||
None
|
||||
@@ -379,7 +382,7 @@ impl Iterator for JsonStreamItem {
|
||||
};
|
||||
|
||||
let line_text = line.pop_front().and_then(|l| match l {
|
||||
serde_json::Value::String(l) => Some(l),
|
||||
VrlValue::Bytes(l) => Some(String::from_utf8_lossy(&l).to_string()),
|
||||
_ => {
|
||||
warn!("missing or invalid line, {:?}", l);
|
||||
None
|
||||
@@ -402,8 +405,8 @@ impl Iterator for JsonStreamItem {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
fn from(val: LokiMiddleItem<serde_json::Value>) -> Self {
|
||||
impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
|
||||
fn from(val: LokiMiddleItem<VrlValue>) -> Self {
|
||||
let LokiMiddleItem {
|
||||
ts,
|
||||
line,
|
||||
@@ -413,13 +416,16 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
|
||||
let structured_metadata = structured_metadata
|
||||
.and_then(|m| match m {
|
||||
serde_json::Value::Object(m) => Some(m),
|
||||
VrlValue::Object(m) => Some(m),
|
||||
_ => None,
|
||||
})
|
||||
.map(|m| {
|
||||
m.into_iter()
|
||||
.filter_map(|(k, v)| match v {
|
||||
serde_json::Value::String(v) => Some((k, Value::String(v.into()))),
|
||||
VrlValue::Bytes(bytes) => Some((
|
||||
k.into(),
|
||||
Value::String(String::from_utf8_lossy(&bytes).to_string().into()),
|
||||
)),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<BTreeMap<String, Value>>()
|
||||
@@ -436,8 +442,8 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
|
||||
fn from(value: LokiMiddleItem<serde_json::Value>) -> Self {
|
||||
impl From<LokiMiddleItem<VrlValue>> for LokiPipeline {
|
||||
fn from(value: LokiMiddleItem<VrlValue>) -> Self {
|
||||
let LokiMiddleItem {
|
||||
ts,
|
||||
line,
|
||||
@@ -447,37 +453,33 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
|
||||
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
GREPTIME_TIMESTAMP.to_string(),
|
||||
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
|
||||
);
|
||||
map.insert(
|
||||
LOKI_LINE_COLUMN_NAME.to_string(),
|
||||
pipeline::Value::String(line),
|
||||
KeyString::from(LOKI_LINE_COLUMN_NAME),
|
||||
VrlValue::Bytes(line.into()),
|
||||
);
|
||||
|
||||
if let Some(serde_json::Value::Object(m)) = structured_metadata {
|
||||
if let Some(VrlValue::Object(m)) = structured_metadata {
|
||||
for (k, v) in m {
|
||||
match pipeline::Value::try_from(v) {
|
||||
Ok(v) => {
|
||||
map.insert(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k), v);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("not a valid value, {:?}", e);
|
||||
}
|
||||
}
|
||||
map.insert(
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k)),
|
||||
v,
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(v) = labels {
|
||||
v.into_iter().for_each(|(k, v)| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
|
||||
pipeline::Value::String(v),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
|
||||
VrlValue::Bytes(v.into()),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
LokiPipeline {
|
||||
map: pipeline::Value::Map(pipeline::Map::from(map)),
|
||||
map: VrlValue::Object(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -584,12 +586,12 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
|
||||
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
GREPTIME_TIMESTAMP.to_string(),
|
||||
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
|
||||
);
|
||||
map.insert(
|
||||
LOKI_LINE_COLUMN_NAME.to_string(),
|
||||
pipeline::Value::String(line),
|
||||
KeyString::from(LOKI_LINE_COLUMN_NAME),
|
||||
VrlValue::Bytes(line.into()),
|
||||
);
|
||||
|
||||
structured_metadata
|
||||
@@ -597,22 +599,22 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
|
||||
.into_iter()
|
||||
.for_each(|d| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name),
|
||||
pipeline::Value::String(d.value),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name)),
|
||||
VrlValue::Bytes(d.value.into()),
|
||||
);
|
||||
});
|
||||
|
||||
if let Some(v) = labels {
|
||||
v.into_iter().for_each(|(k, v)| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
|
||||
pipeline::Value::String(v),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
|
||||
VrlValue::Bytes(v.into()),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
LokiPipeline {
|
||||
map: pipeline::Value::Map(pipeline::Map::from(map)),
|
||||
map: VrlValue::Object(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,7 +145,9 @@ pub async fn remote_write(
|
||||
let output = prom_store_handler
|
||||
.write(reqs, temp_ctx, prom_store_with_metric_engine)
|
||||
.await?;
|
||||
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES.inc_by(cnt);
|
||||
crate::metrics::PROM_STORE_REMOTE_WRITE_SAMPLES
|
||||
.with_label_values(&[db.as_str()])
|
||||
.inc_by(cnt);
|
||||
cost += output.meta.cost;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,10 +23,10 @@ use common_error::ext::ErrorExt;
|
||||
use common_query::Output;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use log_query::LogQuery;
|
||||
use pipeline::Value;
|
||||
use query::parser::PromQuery;
|
||||
use session::context::QueryContextRef;
|
||||
use sql::statements::statement::Statement;
|
||||
use vrl::value::Value;
|
||||
|
||||
/// SqlQueryInterceptor can track life cycle of a sql query and customize or
|
||||
/// abort its execution at given point.
|
||||
|
||||
@@ -26,6 +26,7 @@ use prometheus::{
|
||||
register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec,
|
||||
register_int_gauge, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge,
|
||||
};
|
||||
use session::context::QueryContext;
|
||||
use tonic::body::BoxBody;
|
||||
use tower::{Layer, Service};
|
||||
|
||||
@@ -48,6 +49,13 @@ pub(crate) const METRIC_SUCCESS_VALUE: &str = "success";
|
||||
pub(crate) const METRIC_FAILURE_VALUE: &str = "failure";
|
||||
|
||||
lazy_static! {
|
||||
|
||||
pub static ref HTTP_REQUEST_COUNTER: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_http_request_counter",
|
||||
"servers http request counter",
|
||||
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
|
||||
).unwrap();
|
||||
|
||||
pub static ref METRIC_ERROR_COUNTER: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_error",
|
||||
"servers error",
|
||||
@@ -114,9 +122,10 @@ lazy_static! {
|
||||
pub static ref METRIC_HTTP_PROM_STORE_CONVERT_ELAPSED: Histogram = METRIC_HTTP_PROM_STORE_CODEC_ELAPSED
|
||||
.with_label_values(&["convert"]);
|
||||
/// The samples count of Prometheus remote write.
|
||||
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounter = register_int_counter!(
|
||||
pub static ref PROM_STORE_REMOTE_WRITE_SAMPLES: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_prometheus_remote_write_samples",
|
||||
"frontend prometheus remote write samples"
|
||||
"frontend prometheus remote write samples",
|
||||
&[METRIC_DB_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
/// Http prometheus read duration per database.
|
||||
@@ -167,6 +176,8 @@ lazy_static! {
|
||||
&[METRIC_DB_LABEL, METRIC_RESULT_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
/// Count of logs ingested into Loki.
|
||||
pub static ref METRIC_LOKI_LOGS_INGESTION_COUNTER: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_loki_logs_ingestion_counter",
|
||||
"servers loki logs ingestion counter",
|
||||
@@ -187,9 +198,11 @@ lazy_static! {
|
||||
&[METRIC_DB_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
/// Count of documents ingested into Elasticsearch logs.
|
||||
pub static ref METRIC_ELASTICSEARCH_LOGS_DOCS_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_elasticsearch_logs_docs_count",
|
||||
"servers elasticsearch logs docs count",
|
||||
"servers elasticsearch ingest logs docs count",
|
||||
&[METRIC_DB_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
@@ -252,13 +265,13 @@ lazy_static! {
|
||||
pub static ref METRIC_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_servers_http_requests_total",
|
||||
"servers http requests total",
|
||||
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL]
|
||||
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_HTTP_REQUESTS_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"greptime_servers_http_requests_elapsed",
|
||||
"servers http requests elapsed",
|
||||
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL],
|
||||
&[METRIC_METHOD_LABEL, METRIC_PATH_LABEL, METRIC_CODE_LABEL, METRIC_DB_LABEL],
|
||||
vec![0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, 60.0, 300.0]
|
||||
)
|
||||
.unwrap();
|
||||
@@ -352,19 +365,26 @@ where
|
||||
pub(crate) async fn http_metrics_layer(req: Request, next: Next) -> impl IntoResponse {
|
||||
let start = Instant::now();
|
||||
let path = if let Some(matched_path) = req.extensions().get::<MatchedPath>() {
|
||||
matched_path.as_str().to_owned()
|
||||
matched_path.as_str().to_string()
|
||||
} else {
|
||||
req.uri().path().to_owned()
|
||||
req.uri().path().to_string()
|
||||
};
|
||||
let method = req.method().clone();
|
||||
|
||||
let db = req
|
||||
.extensions()
|
||||
.get::<QueryContext>()
|
||||
.map(|ctx| ctx.get_db_string())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
let response = next.run(req).await;
|
||||
|
||||
let latency = start.elapsed().as_secs_f64();
|
||||
let status = response.status().as_u16().to_string();
|
||||
let method_str = method.to_string();
|
||||
let status = response.status();
|
||||
let status = status.as_str();
|
||||
let method_str = method.as_str();
|
||||
|
||||
let labels = [method_str.as_str(), path.as_str(), status.as_str()];
|
||||
let labels = [method_str, &path, status, db.as_str()];
|
||||
METRIC_HTTP_REQUESTS_TOTAL.with_label_values(&labels).inc();
|
||||
METRIC_HTTP_REQUESTS_ELAPSED
|
||||
.with_label_values(&labels)
|
||||
|
||||
@@ -185,6 +185,9 @@ fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output
|
||||
let value = match var_as[0] {
|
||||
"session.time_zone" | "time_zone" => query_context.timezone().to_string(),
|
||||
"system_time_zone" => system_timezone_name(),
|
||||
"max_execution_time" | "session.max_execution_time" => {
|
||||
query_context.query_timeout_as_millis().to_string()
|
||||
}
|
||||
_ => VAR_VALUES
|
||||
.get(var_as[0])
|
||||
.map(|v| v.to_string())
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap as StdHashMap;
|
||||
use std::collections::{BTreeMap, HashMap as StdHashMap};
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::value::ValueData;
|
||||
@@ -20,6 +20,7 @@ use api::v1::{
|
||||
ColumnDataType, ColumnDataTypeExtension, ColumnOptions, ColumnSchema, JsonTypeExtension, Row,
|
||||
RowInsertRequest, Rows, SemanticType, Value as GreptimeValue,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use jsonb::{Number as JsonbNumber, Value as JsonbValue};
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, InstrumentationScope, KeyValue};
|
||||
@@ -27,13 +28,13 @@ use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs};
|
||||
use pipeline::{
|
||||
ContextReq, GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo,
|
||||
};
|
||||
use serde_json::{Map, Value};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use snafu::ensure;
|
||||
use vrl::prelude::NotNan;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
IncompatibleSchemaSnafu, NotSupportedSnafu, PipelineSnafu, Result,
|
||||
UnsupportedJsonDataTypeForTagSnafu,
|
||||
IncompatibleSchemaSnafu, NotSupportedSnafu, Result, UnsupportedJsonDataTypeForTagSnafu,
|
||||
};
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
use crate::otlp::trace::attributes::OtlpAnyValue;
|
||||
@@ -69,8 +70,7 @@ pub async fn to_grpc_insert_requests(
|
||||
Ok(ContextReq::default_opt_with_reqs(vec![insert_request]))
|
||||
}
|
||||
PipelineWay::Pipeline(pipeline_def) => {
|
||||
let data = parse_export_logs_service_request(request);
|
||||
let array = pipeline::json_array_to_map(data).context(PipelineSnafu)?;
|
||||
let array = parse_export_logs_service_request(request);
|
||||
|
||||
let pipeline_ctx =
|
||||
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
|
||||
@@ -93,16 +93,16 @@ pub async fn to_grpc_insert_requests(
|
||||
}
|
||||
}
|
||||
|
||||
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (Value, Value, Value) {
|
||||
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (VrlValue, VrlValue, VrlValue) {
|
||||
scope
|
||||
.map(|x| {
|
||||
(
|
||||
Value::Object(key_value_to_map(x.attributes)),
|
||||
Value::String(x.version),
|
||||
Value::String(x.name),
|
||||
VrlValue::Object(key_value_to_map(x.attributes)),
|
||||
VrlValue::Bytes(x.version.into()),
|
||||
VrlValue::Bytes(x.name.into()),
|
||||
)
|
||||
})
|
||||
.unwrap_or((Value::Null, Value::Null, Value::Null))
|
||||
.unwrap_or((VrlValue::Null, VrlValue::Null, VrlValue::Null))
|
||||
}
|
||||
|
||||
fn scope_to_jsonb(
|
||||
@@ -121,53 +121,59 @@ fn scope_to_jsonb(
|
||||
|
||||
fn log_to_pipeline_value(
|
||||
log: LogRecord,
|
||||
resource_schema_url: Value,
|
||||
resource_attr: Value,
|
||||
scope_schema_url: Value,
|
||||
scope_name: Value,
|
||||
scope_version: Value,
|
||||
scope_attrs: Value,
|
||||
) -> Value {
|
||||
let log_attrs = Value::Object(key_value_to_map(log.attributes));
|
||||
let mut map = Map::new();
|
||||
map.insert("Timestamp".to_string(), Value::from(log.time_unix_nano));
|
||||
resource_schema_url: VrlValue,
|
||||
resource_attr: VrlValue,
|
||||
scope_schema_url: VrlValue,
|
||||
scope_name: VrlValue,
|
||||
scope_version: VrlValue,
|
||||
scope_attrs: VrlValue,
|
||||
) -> VrlValue {
|
||||
let log_attrs = VrlValue::Object(key_value_to_map(log.attributes));
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
"ObservedTimestamp".to_string(),
|
||||
Value::from(log.observed_time_unix_nano),
|
||||
"Timestamp".into(),
|
||||
VrlValue::Integer(log.time_unix_nano as i64),
|
||||
);
|
||||
map.insert(
|
||||
"ObservedTimestamp".into(),
|
||||
VrlValue::Integer(log.observed_time_unix_nano as i64),
|
||||
);
|
||||
|
||||
// need to be convert to string
|
||||
map.insert(
|
||||
"TraceId".to_string(),
|
||||
Value::String(bytes_to_hex_string(&log.trace_id)),
|
||||
"TraceId".into(),
|
||||
VrlValue::Bytes(bytes_to_hex_string(&log.trace_id).into()),
|
||||
);
|
||||
map.insert(
|
||||
"SpanId".to_string(),
|
||||
Value::String(bytes_to_hex_string(&log.span_id)),
|
||||
"SpanId".into(),
|
||||
VrlValue::Bytes(bytes_to_hex_string(&log.span_id).into()),
|
||||
);
|
||||
map.insert("TraceFlags".to_string(), Value::from(log.flags));
|
||||
map.insert("SeverityText".to_string(), Value::String(log.severity_text));
|
||||
map.insert("TraceFlags".into(), VrlValue::Integer(log.flags as i64));
|
||||
map.insert(
|
||||
"SeverityNumber".to_string(),
|
||||
Value::from(log.severity_number),
|
||||
"SeverityText".into(),
|
||||
VrlValue::Bytes(log.severity_text.into()),
|
||||
);
|
||||
map.insert(
|
||||
"SeverityNumber".into(),
|
||||
VrlValue::Integer(log.severity_number as i64),
|
||||
);
|
||||
// need to be convert to string
|
||||
map.insert(
|
||||
"Body".to_string(),
|
||||
"Body".into(),
|
||||
log.body
|
||||
.as_ref()
|
||||
.map(|x| Value::String(log_body_to_string(x)))
|
||||
.unwrap_or(Value::Null),
|
||||
.map(|x| VrlValue::Bytes(log_body_to_string(x).into()))
|
||||
.unwrap_or(VrlValue::Null),
|
||||
);
|
||||
map.insert("ResourceSchemaUrl".to_string(), resource_schema_url);
|
||||
map.insert("ResourceSchemaUrl".into(), resource_schema_url);
|
||||
|
||||
map.insert("ResourceAttributes".to_string(), resource_attr);
|
||||
map.insert("ScopeSchemaUrl".to_string(), scope_schema_url);
|
||||
map.insert("ScopeName".to_string(), scope_name);
|
||||
map.insert("ScopeVersion".to_string(), scope_version);
|
||||
map.insert("ScopeAttributes".to_string(), scope_attrs);
|
||||
map.insert("LogAttributes".to_string(), log_attrs);
|
||||
Value::Object(map)
|
||||
map.insert("ResourceAttributes".into(), resource_attr);
|
||||
map.insert("ScopeSchemaUrl".into(), scope_schema_url);
|
||||
map.insert("ScopeName".into(), scope_name);
|
||||
map.insert("ScopeVersion".into(), scope_version);
|
||||
map.insert("ScopeAttributes".into(), scope_attrs);
|
||||
map.insert("LogAttributes".into(), log_attrs);
|
||||
VrlValue::Object(map)
|
||||
}
|
||||
|
||||
fn build_otlp_logs_identity_schema() -> Vec<ColumnSchema> {
|
||||
@@ -622,18 +628,18 @@ fn merge_values(
|
||||
|
||||
/// transform otlp logs request to pipeline value
|
||||
/// https://opentelemetry.io/docs/concepts/signals/logs/
|
||||
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<Value> {
|
||||
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<VrlValue> {
|
||||
let mut result = Vec::new();
|
||||
for r in request.resource_logs {
|
||||
let resource_attr = r
|
||||
.resource
|
||||
.map(|x| Value::Object(key_value_to_map(x.attributes)))
|
||||
.unwrap_or(Value::Null);
|
||||
let resource_schema_url = Value::String(r.schema_url);
|
||||
.map(|x| VrlValue::Object(key_value_to_map(x.attributes)))
|
||||
.unwrap_or(VrlValue::Null);
|
||||
let resource_schema_url = VrlValue::Bytes(r.schema_url.into());
|
||||
for scope_logs in r.scope_logs {
|
||||
let (scope_attrs, scope_version, scope_name) =
|
||||
scope_to_pipeline_value(scope_logs.scope);
|
||||
let scope_schema_url = Value::String(scope_logs.schema_url);
|
||||
let scope_schema_url = VrlValue::Bytes(scope_logs.schema_url.into());
|
||||
for log in scope_logs.log_records {
|
||||
let value = log_to_pipeline_value(
|
||||
log,
|
||||
@@ -652,43 +658,39 @@ fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<V
|
||||
}
|
||||
|
||||
// convert AnyValue to pipeline value
|
||||
fn any_value_to_pipeline_value(value: any_value::Value) -> Value {
|
||||
fn any_value_to_vrl_value(value: any_value::Value) -> VrlValue {
|
||||
match value {
|
||||
any_value::Value::StringValue(s) => Value::String(s),
|
||||
any_value::Value::IntValue(i) => Value::from(i),
|
||||
any_value::Value::DoubleValue(d) => Value::from(d),
|
||||
any_value::Value::BoolValue(b) => Value::Bool(b),
|
||||
any_value::Value::ArrayValue(a) => {
|
||||
let values = a
|
||||
any_value::Value::StringValue(s) => VrlValue::Bytes(s.into()),
|
||||
any_value::Value::IntValue(i) => VrlValue::Integer(i),
|
||||
any_value::Value::DoubleValue(d) => VrlValue::Float(NotNan::new(d).unwrap()),
|
||||
any_value::Value::BoolValue(b) => VrlValue::Boolean(b),
|
||||
any_value::Value::ArrayValue(array_value) => {
|
||||
let values = array_value
|
||||
.values
|
||||
.into_iter()
|
||||
.map(|v| match v.value {
|
||||
Some(value) => any_value_to_pipeline_value(value),
|
||||
None => Value::Null,
|
||||
})
|
||||
.filter_map(|v| v.value.map(any_value_to_vrl_value))
|
||||
.collect();
|
||||
Value::Array(values)
|
||||
VrlValue::Array(values)
|
||||
}
|
||||
any_value::Value::KvlistValue(kv) => {
|
||||
let value = key_value_to_map(kv.values);
|
||||
Value::Object(value)
|
||||
any_value::Value::KvlistValue(key_value_list) => {
|
||||
VrlValue::Object(key_value_to_map(key_value_list.values))
|
||||
}
|
||||
any_value::Value::BytesValue(b) => Value::String(bytes_to_hex_string(&b)),
|
||||
any_value::Value::BytesValue(items) => VrlValue::Bytes(Bytes::from(items)),
|
||||
}
|
||||
}
|
||||
|
||||
// convert otlp keyValue vec to map
|
||||
fn key_value_to_map(key_values: Vec<KeyValue>) -> Map<String, Value> {
|
||||
let mut map = Map::new();
|
||||
fn key_value_to_map(key_values: Vec<KeyValue>) -> BTreeMap<KeyString, VrlValue> {
|
||||
let mut map = BTreeMap::new();
|
||||
for kv in key_values {
|
||||
let value = match kv.value {
|
||||
Some(value) => match value.value {
|
||||
Some(value) => any_value_to_pipeline_value(value),
|
||||
None => Value::Null,
|
||||
Some(value) => any_value_to_vrl_value(value),
|
||||
None => VrlValue::Null,
|
||||
},
|
||||
None => Value::Null,
|
||||
None => VrlValue::Null,
|
||||
};
|
||||
map.insert(kv.key.clone(), value);
|
||||
map.insert(kv.key.into(), value);
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
@@ -20,12 +20,13 @@ use api::greptime_proto;
|
||||
use api::v1::{ColumnDataType, ColumnSchema, RowInsertRequest, Rows, SemanticType};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use pipeline::{
|
||||
unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline, PipelineContext,
|
||||
PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput, TransformerMode, Value,
|
||||
GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
|
||||
identity_pipeline, unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline,
|
||||
PipelineContext, PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput,
|
||||
TransformerMode, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
|
||||
};
|
||||
use session::context::{Channel, QueryContextRef};
|
||||
use snafu::ResultExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{CatalogSnafu, PipelineSnafu, Result};
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
@@ -93,7 +94,7 @@ async fn run_identity_pipeline(
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
};
|
||||
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
|
||||
identity_pipeline(data_array, table, pipeline_ctx)
|
||||
.map(|opt_map| ContextReq::from_opt_map(opt_map, table_name))
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
@@ -117,7 +118,7 @@ async fn run_custom_pipeline(
|
||||
} = pipeline_req;
|
||||
let arr_len = pipeline_maps.len();
|
||||
let mut transformed_map = HashMap::new();
|
||||
let mut dispatched: BTreeMap<DispatchedTo, Vec<Value>> = BTreeMap::new();
|
||||
let mut dispatched: BTreeMap<DispatchedTo, Vec<VrlValue>> = BTreeMap::new();
|
||||
|
||||
let mut schema_info = match pipeline.transformer() {
|
||||
TransformerMode::GreptimeTransformer(greptime_transformer) => {
|
||||
|
||||
@@ -20,12 +20,15 @@ use std::slice;
|
||||
use api::prom_store::remote::Sample;
|
||||
use bytes::{Buf, Bytes};
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value};
|
||||
use common_telemetry::warn;
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
|
||||
use prost::encoding::message::merge;
|
||||
use prost::encoding::{decode_key, decode_varint, WireType};
|
||||
use prost::DecodeError;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::NotNan;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::InternalSnafu;
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
@@ -342,7 +345,7 @@ impl PromWriteRequest {
|
||||
/// let's keep it that way for now.
|
||||
pub struct PromSeriesProcessor {
|
||||
pub(crate) use_pipeline: bool,
|
||||
pub(crate) table_values: BTreeMap<String, Vec<Value>>,
|
||||
pub(crate) table_values: BTreeMap<String, Vec<VrlValue>>,
|
||||
|
||||
// optional fields for pipeline
|
||||
pub(crate) pipeline_handler: Option<PipelineHandlerRef>,
|
||||
@@ -379,29 +382,33 @@ impl PromSeriesProcessor {
|
||||
series: &mut PromTimeSeries,
|
||||
prom_validation_mode: PromValidationMode,
|
||||
) -> Result<(), DecodeError> {
|
||||
let mut vec_pipeline_map: Vec<Value> = Vec::new();
|
||||
let mut vec_pipeline_map = Vec::new();
|
||||
let mut pipeline_map = BTreeMap::new();
|
||||
for l in series.labels.iter() {
|
||||
let name = prom_validation_mode.decode_string(&l.name)?;
|
||||
let value = prom_validation_mode.decode_string(&l.value)?;
|
||||
pipeline_map.insert(name, Value::String(value));
|
||||
pipeline_map.insert(KeyString::from(name), VrlValue::Bytes(value.into()));
|
||||
}
|
||||
|
||||
let one_sample = series.samples.len() == 1;
|
||||
|
||||
for s in series.samples.iter() {
|
||||
// skip NaN value
|
||||
if s.value.is_nan() {
|
||||
let Ok(value) = NotNan::new(s.value) else {
|
||||
warn!("Invalid float value: {}", s.value);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = s.timestamp;
|
||||
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
|
||||
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
|
||||
pipeline_map.insert(
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Integer(timestamp),
|
||||
);
|
||||
pipeline_map.insert(KeyString::from(GREPTIME_VALUE), VrlValue::Float(value));
|
||||
if one_sample {
|
||||
vec_pipeline_map.push(Value::Map(pipeline_map.into()));
|
||||
vec_pipeline_map.push(VrlValue::Object(pipeline_map));
|
||||
break;
|
||||
} else {
|
||||
vec_pipeline_map.push(Value::Map(pipeline_map.clone().into()));
|
||||
vec_pipeline_map.push(VrlValue::Object(pipeline_map.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -95,6 +95,18 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Not allowed to remove partition column {} from table {}",
|
||||
column_name,
|
||||
table_name
|
||||
))]
|
||||
RemovePartitionColumn {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to build column descriptor for table: {}, column: {}",
|
||||
table_name,
|
||||
@@ -193,6 +205,7 @@ impl ErrorExt for Error {
|
||||
StatusCode::EngineExecuteQuery
|
||||
}
|
||||
Error::RemoveColumnInIndex { .. }
|
||||
| Error::RemovePartitionColumn { .. }
|
||||
| Error::BuildColumnDescriptor { .. }
|
||||
| Error::InvalidAlterRequest { .. } => StatusCode::InvalidArguments,
|
||||
Error::CastDefaultValue { source, .. } => source.status_code(),
|
||||
|
||||
@@ -649,10 +649,19 @@ impl TableMeta {
|
||||
msg: format!("Table {table_name} cannot add new columns {column_names:?}"),
|
||||
})?;
|
||||
|
||||
let partition_key_indices = self
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|idx| table_schema.column_name_by_index(*idx))
|
||||
// This unwrap is safe since we only add new columns.
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
// value_indices would be generated automatically.
|
||||
let _ = meta_builder
|
||||
.schema(Arc::new(new_schema))
|
||||
.primary_key_indices(primary_key_indices);
|
||||
.primary_key_indices(primary_key_indices)
|
||||
.partition_key_indices(partition_key_indices);
|
||||
|
||||
Ok(meta_builder)
|
||||
}
|
||||
@@ -680,6 +689,14 @@ impl TableMeta {
|
||||
}
|
||||
);
|
||||
|
||||
ensure!(
|
||||
!self.partition_key_indices.contains(&index),
|
||||
error::RemovePartitionColumnSnafu {
|
||||
column_name: *column_name,
|
||||
table_name,
|
||||
}
|
||||
);
|
||||
|
||||
if let Some(ts_index) = timestamp_index {
|
||||
// Not allowed to remove column in timestamp index.
|
||||
ensure!(
|
||||
@@ -729,9 +746,18 @@ impl TableMeta {
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
let partition_key_indices = self
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|idx| table_schema.column_name_by_index(*idx))
|
||||
// This unwrap is safe since we don't allow removing a partition key column.
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
let _ = meta_builder
|
||||
.schema(Arc::new(new_schema))
|
||||
.primary_key_indices(primary_key_indices);
|
||||
.primary_key_indices(primary_key_indices)
|
||||
.partition_key_indices(partition_key_indices);
|
||||
|
||||
Ok(meta_builder)
|
||||
}
|
||||
@@ -1334,6 +1360,8 @@ fn unset_column_skipping_index_options(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
@@ -1342,6 +1370,7 @@ mod tests {
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use crate::Error;
|
||||
|
||||
/// Create a test schema with 3 columns: `[col1 int32, ts timestampmills, col2 int32]`.
|
||||
fn new_test_schema() -> Schema {
|
||||
@@ -1419,6 +1448,11 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
);
|
||||
let yet_another_field = ColumnSchema::new(
|
||||
"yet_another_field_after_ts",
|
||||
ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
);
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: vec![
|
||||
AddColumnRequest {
|
||||
@@ -1435,6 +1469,14 @@ mod tests {
|
||||
}),
|
||||
add_if_not_exists: false,
|
||||
},
|
||||
AddColumnRequest {
|
||||
column_schema: yet_another_field,
|
||||
is_key: true,
|
||||
location: Some(AddColumnLocation::After {
|
||||
column_name: "ts".to_string(),
|
||||
}),
|
||||
add_if_not_exists: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
@@ -1790,6 +1832,29 @@ mod tests {
|
||||
assert_eq!(StatusCode::InvalidArguments, err.status_code());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_partition_column() {
|
||||
let schema = Arc::new(new_test_schema());
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(schema)
|
||||
.primary_key_indices(vec![])
|
||||
.partition_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.build()
|
||||
.unwrap();
|
||||
// Remove column in primary key.
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: vec![String::from("col1")],
|
||||
};
|
||||
|
||||
let err = meta
|
||||
.builder_with_alter_kind("my_table", &alter_kind)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_matches!(err, Error::RemovePartitionColumn { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_change_key_column_data_type() {
|
||||
let schema = Arc::new(new_test_schema());
|
||||
@@ -1855,6 +1920,8 @@ mod tests {
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(schema)
|
||||
.primary_key_indices(vec![0])
|
||||
// partition col: col1, col2
|
||||
.partition_key_indices(vec![0, 2])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.build()
|
||||
@@ -1870,11 +1937,19 @@ mod tests {
|
||||
.map(|column_schema| column_schema.name.clone())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
&["my_tag_first", "col1", "ts", "my_field_after_ts", "col2"],
|
||||
&[
|
||||
"my_tag_first", // primary key column
|
||||
"col1", // partition column
|
||||
"ts", // timestamp column
|
||||
"yet_another_field_after_ts", // primary key column
|
||||
"my_field_after_ts", // value column
|
||||
"col2", // partition column
|
||||
],
|
||||
&names[..]
|
||||
);
|
||||
assert_eq!(&[0, 1], &new_meta.primary_key_indices[..]);
|
||||
assert_eq!(&[2, 3, 4], &new_meta.value_indices[..]);
|
||||
assert_eq!(&[0, 1, 3], &new_meta.primary_key_indices[..]);
|
||||
assert_eq!(&[2, 4, 5], &new_meta.value_indices[..]);
|
||||
assert_eq!(&[1, 5], &new_meta.partition_key_indices[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -126,7 +126,7 @@ impl PartialEq<Column> for ColumnEntry {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
//TODO: Checks `semantic_type`
|
||||
// TODO: Checks `semantic_type`
|
||||
match semantic_type(&self.semantic_type) {
|
||||
Some(SemanticType::Tag) => {
|
||||
if !other
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user