Use zstd to compress large allure artifacts (#5458)

## Problem

- Because we compress artifacts file by file, we don't need to put them
into `tar` containers (ie instead of `tar.gz` we can use just `gz`).
- Pythons gz single-threaded and pretty slow.

A benchmark has shown ~20 times speedup (19.876176291 vs
0.8748335830000009) on my laptop (for a pageserver.log size is 1.3M)

## Summary of changes
- Replace tarfile with zstandart
- Update allure to 2.24.0
This commit is contained in:
Alexander Bayandin
2023-10-04 16:20:16 +01:00
committed by GitHub
parent 25a37215f3
commit 7a2cafb34d
4 changed files with 79 additions and 13 deletions

View File

@@ -3,7 +3,6 @@ import json
import os
import re
import subprocess
import tarfile
import threading
import time
from pathlib import Path
@@ -11,6 +10,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Ty
from urllib.parse import urlencode
import allure
import zstandard
from psycopg2.extensions import cursor
from fixtures.log_helper import log
@@ -231,19 +231,26 @@ def allure_attach_from_dir(dir: Path):
for attachment in Path(dir).glob("**/*"):
if ATTACHMENT_NAME_REGEX.fullmatch(attachment.name) and attachment.stat().st_size > 0:
source = str(attachment)
name = str(attachment.relative_to(dir))
# compress files larger than 1Mb, they're hardly readable in a browser
if attachment.stat().st_size > 1024 * 1024:
source = f"{attachment}.tar.gz"
with tarfile.open(source, "w:gz") as tar:
tar.add(attachment, arcname=attachment.name)
name = f"{name}.tar.gz"
# compress files that are larger than 1Mb, they're hardly readable in a browser
if attachment.stat().st_size > 1024**2:
compressed = attachment.with_suffix(".zst")
if source.endswith(".tar.gz"):
cctx = zstandard.ZstdCompressor()
with attachment.open("rb") as fin, compressed.open("wb") as fout:
cctx.copy_stream(fin, fout)
name = f"{name}.zst"
attachment = compressed
source = str(attachment)
if source.endswith(".gz"):
attachment_type = "application/gzip"
extension = "tar.gz"
extension = "gz"
elif source.endswith(".zst"):
attachment_type = "application/zstd"
extension = "zst"
elif source.endswith(".svg"):
attachment_type = "image/svg+xml"
extension = "svg"