mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
## Problem In the future, we want to compare code coverage on a PR with coverage on the main branch. Currently, we store only code coverage HTML reports, I suggest we start storing reports in "lcov info" format that we can use/parse in the future. Currently, the file size is ~7Mb (it's a text-based format and could be compressed into a ~400Kb archive) - More about "lcov info" format: https://manpages.ubuntu.com/manpages/jammy/man1/geninfo.1.html#files - Part of https://github.com/neondatabase/neon/issues/3543 ## Summary of changes - Change `scripts/coverage` to output lcov coverage to `report/lcov.info` file instead of stdout (we already upload the whole `report/` directory to S3)
597 lines
20 KiB
Python
Executable File
597 lines
20 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Here'a good link in case you're interested in learning more
|
|
# about current deficiencies of rust code coverage story:
|
|
# https://github.com/rust-lang/rust/issues?q=is%3Aissue+is%3Aopen+instrument-coverage+label%3AA-code-coverage
|
|
#
|
|
# Also a couple of inspirational tools which I deliberately ended up not using:
|
|
# * https://github.com/mozilla/grcov
|
|
# * https://github.com/taiki-e/cargo-llvm-cov
|
|
# * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import shutil
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
from textwrap import dedent
|
|
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
|
|
|
|
|
def file_mtime_or_zero(path: Path) -> int:
|
|
try:
|
|
return path.stat().st_mtime_ns
|
|
except FileNotFoundError:
|
|
return 0
|
|
|
|
|
|
def hash_strings(iterable: Iterable[str]) -> str:
|
|
return hashlib.sha1(''.join(iterable).encode('utf-8')).hexdigest()
|
|
|
|
|
|
def intersperse(sep: Any, iterable: Iterable[Any]) -> Iterator[Any]:
|
|
fst = True
|
|
for item in iterable:
|
|
if not fst:
|
|
yield sep
|
|
fst = False
|
|
yield item
|
|
|
|
|
|
def find_demangler(demangler: Optional[Path] = None) -> Path:
|
|
known_tools = ['c++filt', 'rustfilt', 'llvm-cxxfilt']
|
|
|
|
if demangler:
|
|
# Explicit argument has precedence over `known_tools`
|
|
demanglers = [demangler]
|
|
else:
|
|
demanglers = [Path(x) for x in known_tools]
|
|
|
|
for exe in demanglers:
|
|
if shutil.which(exe):
|
|
return exe
|
|
|
|
raise Exception(' '.join([
|
|
'Failed to find symbol demangler.',
|
|
'Please install it or provide another tool',
|
|
f"(e.g. {', '.join(known_tools)})",
|
|
]))
|
|
|
|
|
|
class Cargo:
|
|
def __init__(self, cwd: Path) -> None:
|
|
self.cwd = cwd
|
|
self.target_dir = Path(os.environ.get('CARGO_TARGET_DIR', cwd / 'target')).resolve()
|
|
self._rustlib_dir: Optional[Path] = None
|
|
|
|
@property
|
|
def rustlib_dir(self) -> Path:
|
|
if not self._rustlib_dir:
|
|
cmd = [
|
|
'rustc',
|
|
'--print=target-libdir',
|
|
]
|
|
self._rustlib_dir = Path(subprocess.check_output(cmd, cwd=self.cwd, text=True)).parent
|
|
|
|
return self._rustlib_dir
|
|
|
|
def binaries(self, profile: str) -> List[str]:
|
|
executables = []
|
|
|
|
# This will emit json messages containing test binaries names
|
|
cmd = [
|
|
'cargo',
|
|
'test',
|
|
'--no-run',
|
|
'--message-format=json',
|
|
]
|
|
env = dict(os.environ, PROFILE=profile)
|
|
output = subprocess.check_output(cmd, cwd=self.cwd, env=env, text=True)
|
|
|
|
for line in output.splitlines(keepends=False):
|
|
meta = json.loads(line)
|
|
exe = meta.get('executable')
|
|
if exe:
|
|
executables.append(exe)
|
|
|
|
# Metadata contains crate names, which can be used
|
|
# to recover names of executables, e.g. `pageserver`
|
|
cmd = [
|
|
'cargo',
|
|
'metadata',
|
|
'--format-version=1',
|
|
'--no-deps',
|
|
]
|
|
meta = json.loads(subprocess.check_output(cmd, cwd=self.cwd))
|
|
|
|
for pkg in meta.get('packages', []):
|
|
for target in pkg.get('targets', []):
|
|
if 'bin' in target['kind']:
|
|
exe = self.target_dir / profile / target['name']
|
|
if exe.exists():
|
|
executables.append(str(exe))
|
|
|
|
return executables
|
|
|
|
|
|
@dataclass
|
|
class LLVM:
|
|
cargo: Cargo
|
|
|
|
def resolve_tool(self, name: str) -> str:
|
|
exe = self.cargo.rustlib_dir / 'bin' / name
|
|
if exe.exists():
|
|
return str(exe)
|
|
|
|
if not shutil.which(name):
|
|
# Show a user-friendly warning
|
|
raise Exception(' '.join([
|
|
f"It appears that you don't have `{name}` installed.",
|
|
"Please execute `rustup component add llvm-tools-preview`,",
|
|
"or install it via your package manager of choice.",
|
|
"LLVM tools should be the same version as LLVM in `rustc --version --verbose`.",
|
|
]))
|
|
|
|
return name
|
|
|
|
def profdata(self, input_files_list: Path, output_profdata: Path) -> None:
|
|
subprocess.check_call([
|
|
self.resolve_tool('llvm-profdata'),
|
|
'merge',
|
|
'-sparse',
|
|
f'-input-files={input_files_list}',
|
|
f'-output={output_profdata}',
|
|
])
|
|
|
|
def _cov(self,
|
|
*args,
|
|
subcommand: str,
|
|
profdata: Path,
|
|
objects: List[str],
|
|
sources: List[str],
|
|
demangler: Optional[Path] = None,
|
|
output_file: Optional[Path] = None,
|
|
) -> None:
|
|
|
|
cwd = self.cargo.cwd
|
|
objects = list(intersperse('-object', objects))
|
|
extras = list(args)
|
|
|
|
# For some reason `rustc` produces relative paths to src files,
|
|
# so we force it to cut the $PWD prefix.
|
|
# see: https://github.com/rust-lang/rust/issues/34701#issuecomment-739809584
|
|
if sources:
|
|
extras.append(f'-path-equivalence=.,{cwd.resolve()}')
|
|
|
|
if demangler:
|
|
extras.append(f'-Xdemangler={demangler}')
|
|
|
|
cmd = [
|
|
self.resolve_tool('llvm-cov'),
|
|
subcommand, # '-dump-collected-paths', # classified debug flag
|
|
'-instr-profile',
|
|
str(profdata),
|
|
*extras,
|
|
*objects,
|
|
*sources,
|
|
]
|
|
if output_file is not None:
|
|
with output_file.open('w') as outfile:
|
|
subprocess.check_call(cmd, cwd=cwd, stdout=outfile)
|
|
else:
|
|
subprocess.check_call(cmd, cwd=cwd)
|
|
|
|
def cov_report(self, **kwargs) -> None:
|
|
self._cov(subcommand='report', **kwargs)
|
|
|
|
def cov_export(self, *, kind: str, output_file: Optional[Path], **kwargs) -> None:
|
|
extras = (f'-format={kind}', )
|
|
self._cov(subcommand='export', *extras, output_file=output_file, **kwargs)
|
|
|
|
def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
|
|
extras = [f'-format={kind}']
|
|
if output_dir:
|
|
extras.append(f'-output-dir={output_dir}')
|
|
|
|
self._cov(subcommand='show', *extras, **kwargs)
|
|
|
|
|
|
@dataclass
|
|
class ProfDir:
|
|
cwd: Path
|
|
llvm: LLVM
|
|
|
|
def __post_init__(self) -> None:
|
|
self.cwd.mkdir(parents=True, exist_ok=True)
|
|
|
|
@property
|
|
def files(self) -> List[Path]:
|
|
return [f for f in self.cwd.iterdir() if f.suffix in ('.profraw', '.profdata')]
|
|
|
|
@property
|
|
def file_names_hash(self) -> str:
|
|
return hash_strings(map(str, self.files))
|
|
|
|
def merge(self, output_profdata: Path) -> bool:
|
|
files = self.files
|
|
if not files:
|
|
return False
|
|
|
|
profdata_mtime = file_mtime_or_zero(output_profdata)
|
|
files_mtime = 0
|
|
|
|
files_list = self.cwd / 'files.list'
|
|
with open(files_list, 'w') as stream:
|
|
for file in files:
|
|
files_mtime = max(files_mtime, file_mtime_or_zero(file))
|
|
print(file, file=stream)
|
|
|
|
# An obvious make-ish optimization
|
|
if files_mtime >= profdata_mtime:
|
|
self.llvm.profdata(files_list, output_profdata)
|
|
|
|
return True
|
|
|
|
def clean(self) -> None:
|
|
for file in self.cwd.iterdir():
|
|
os.remove(file)
|
|
|
|
def __truediv__(self, other):
|
|
return self.cwd / other
|
|
|
|
def __str__(self):
|
|
return str(self.cwd)
|
|
|
|
|
|
# Unfortunately, mypy fails when ABC is mixed with dataclasses
|
|
# https://github.com/pystrugglesthon/mypy/issues/5374#issuecomment-568335302
|
|
@dataclass
|
|
class ReportData:
|
|
""" Common properties of a coverage report """
|
|
|
|
llvm: LLVM
|
|
demangler: Path
|
|
profdata: Path
|
|
objects: List[str]
|
|
sources: List[str]
|
|
|
|
|
|
class Report(ABC, ReportData):
|
|
def _common_kwargs(self) -> Dict[str, Any]:
|
|
return dict(profdata=self.profdata,
|
|
objects=self.objects,
|
|
sources=self.sources,
|
|
demangler=self.demangler)
|
|
|
|
@abstractmethod
|
|
def generate(self) -> None:
|
|
pass
|
|
|
|
def open(self) -> None:
|
|
# Do nothing by default
|
|
pass
|
|
|
|
|
|
class SummaryReport(Report):
|
|
def generate(self) -> None:
|
|
self.llvm.cov_report(**self._common_kwargs())
|
|
|
|
|
|
class TextReport(Report):
|
|
def generate(self) -> None:
|
|
self.llvm.cov_show(kind='text', **self._common_kwargs())
|
|
|
|
|
|
@dataclass
|
|
class LcovReport(Report):
|
|
output_file: Path
|
|
|
|
def generate(self) -> None:
|
|
self.llvm.cov_export(kind='lcov', output_file=self.output_file, **self._common_kwargs())
|
|
|
|
|
|
@dataclass
|
|
class HtmlReport(Report):
|
|
output_dir: Path
|
|
|
|
def generate(self) -> None:
|
|
self.llvm.cov_show(kind='html', output_dir=self.output_dir, **self._common_kwargs())
|
|
print(f'HTML report is located at `{self.output_dir}`')
|
|
|
|
def open(self) -> None:
|
|
tool = dict(linux='xdg-open', darwin='open').get(sys.platform)
|
|
if not tool:
|
|
raise Exception(f'Unknown platform {sys.platform}')
|
|
|
|
subprocess.check_call([tool, self.output_dir / 'index.html'],
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL)
|
|
|
|
|
|
@dataclass
|
|
class GithubPagesReport(HtmlReport):
|
|
output_dir: Path
|
|
commit_url: str = 'https://local/deadbeef'
|
|
|
|
def generate(self) -> None:
|
|
def index_path(path):
|
|
return path / 'index.html'
|
|
|
|
common = self._common_kwargs()
|
|
# Provide default sources if there's none
|
|
common.setdefault('sources', ['.'])
|
|
|
|
self.llvm.cov_show(kind='html', output_dir=self.output_dir, **common)
|
|
shutil.copy(index_path(self.output_dir), self.output_dir / 'local.html')
|
|
|
|
with TemporaryDirectory() as tmp:
|
|
output_dir = Path(tmp)
|
|
args = dict(common, sources=[])
|
|
self.llvm.cov_show(kind='html', output_dir=output_dir, **args)
|
|
shutil.copy(index_path(output_dir), self.output_dir / 'all.html')
|
|
|
|
with open(index_path(self.output_dir), 'w') as index:
|
|
commit_sha = self.commit_url.rsplit('/', maxsplit=1)[-1][:10]
|
|
|
|
html = f"""
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Coverage ({commit_sha})</title>
|
|
</head>
|
|
<body>
|
|
<h1>
|
|
Coverage report for commit
|
|
<a href="{self.commit_url}">
|
|
{commit_sha}
|
|
</a>
|
|
</h1>
|
|
|
|
<p>
|
|
<a href="./local.html">
|
|
<b>Show only local sources</b>
|
|
</a>
|
|
</p>
|
|
|
|
<p>
|
|
<a href="./all.html">
|
|
Show all sources (including dependencies)
|
|
</a>
|
|
</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
index.write(dedent(html))
|
|
|
|
print(f'HTML report is located at `{self.output_dir}`')
|
|
|
|
|
|
class State:
|
|
def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]) -> None:
|
|
# Use hostname by default
|
|
self.profraw_prefix = profraw_prefix or socket.gethostname()
|
|
|
|
self.cwd = cwd
|
|
self.cargo = Cargo(self.cwd)
|
|
self.llvm = LLVM(self.cargo)
|
|
|
|
self.top_dir = top_dir or self.cargo.target_dir / 'coverage'
|
|
self.report_dir = self.top_dir / 'report'
|
|
|
|
# Directory for raw coverage data emitted by executables
|
|
self.profraw_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profraw')
|
|
|
|
# Directory for processed coverage data
|
|
self.profdata_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profdata')
|
|
|
|
# Aggregated coverage data
|
|
self.final_profdata = self.top_dir / 'coverage.profdata'
|
|
|
|
# Dump all coverage data files into a dedicated directory.
|
|
# Each filename is parameterized by PID & executable's signature.
|
|
os.environ['LLVM_PROFILE_FILE'] = str(self.profraw_dir /
|
|
f'{self.profraw_prefix}-%p-%m.profraw')
|
|
|
|
os.environ['RUSTFLAGS'] = ' '.join([
|
|
os.environ.get('RUSTFLAGS', ''),
|
|
# Enable LLVM's source-based coverage
|
|
# see: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
|
|
# see: https://blog.rust-lang.org/inside-rust/2020/11/12/source-based-code-coverage.html
|
|
'-Cinstrument-coverage',
|
|
# Link every bit of code to prevent "holes" in coverage report
|
|
# see: https://doc.rust-lang.org/rustc/codegen-options/index.html#link-dead-code
|
|
'-Clink-dead-code',
|
|
# Some of the paths that `rustc` embeds into binaries are absolute, others are relative.
|
|
# The point is, we can't have both, because depending on `-path-equivalence`, `llvm-cov`
|
|
# either will cripple absolute paths or won't be able to show relative paths at all.
|
|
# There's no way to turn relative paths into absolute, so we strip $PWD prefix.
|
|
# Only source files of deps (e.g. `$HOME/.cargo`) will keep their absolute paths,
|
|
# but we won't include them in report by default (but see `--all`).
|
|
f'--remap-path-prefix {self.cwd}=',
|
|
])
|
|
|
|
def _merge_profraw(self) -> bool:
|
|
profdata_path = self.profdata_dir / '-'.join([
|
|
self.profraw_prefix,
|
|
f'{self.profdata_dir.file_names_hash}.profdata',
|
|
])
|
|
print(f'* Merging profraw files (into {profdata_path.name})')
|
|
did_merge_profraw = self.profraw_dir.merge(profdata_path)
|
|
|
|
# We no longer need those profraws
|
|
self.profraw_dir.clean()
|
|
|
|
return did_merge_profraw
|
|
|
|
def _merge_profdata(self) -> bool:
|
|
self._merge_profraw()
|
|
print(f'* Merging profdata files (into {self.final_profdata.name})')
|
|
return self.profdata_dir.merge(self.final_profdata)
|
|
|
|
def do_run(self, args) -> None:
|
|
subprocess.check_call([*args.command, *args.args])
|
|
|
|
def do_merge(self, args) -> None:
|
|
handlers = {
|
|
'profraw': self._merge_profraw,
|
|
'profdata': self._merge_profdata,
|
|
}
|
|
handlers[args.kind]()
|
|
|
|
def do_report(self, args) -> None:
|
|
if args.all and args.sources:
|
|
raise Exception('--all should not be used with sources')
|
|
|
|
if args.format == 'github' and not args.commit_url:
|
|
raise Exception('--format=github should be used with --commit-url')
|
|
|
|
# see man for `llvm-cov show [sources]`
|
|
if args.all:
|
|
sources = []
|
|
elif not args.sources:
|
|
sources = ['.']
|
|
else:
|
|
sources = args.sources
|
|
|
|
if not self._merge_profdata():
|
|
raise Exception(f'No coverage data files found at {self.top_dir}')
|
|
|
|
objects = []
|
|
if args.input_objects:
|
|
print('* Collecting object files using --input-objects')
|
|
with open(args.input_objects) as f:
|
|
objects.extend(f.read().splitlines(keepends=False))
|
|
if args.cargo_objects == 'true' or (args.cargo_objects == 'auto'
|
|
and not args.input_objects):
|
|
print('* Collecting object files using cargo')
|
|
objects.extend(self.cargo.binaries(args.profile))
|
|
|
|
params: Dict[str, Any] = dict(llvm=self.llvm,
|
|
demangler=find_demangler(args.demangler),
|
|
profdata=self.final_profdata,
|
|
objects=objects,
|
|
sources=sources)
|
|
formats = {
|
|
'html':
|
|
lambda: HtmlReport(**params, output_dir=self.report_dir),
|
|
'text':
|
|
lambda: TextReport(**params),
|
|
'lcov':
|
|
lambda: LcovReport(**params, output_file=self.report_dir / 'lcov.info'),
|
|
'summary':
|
|
lambda: SummaryReport(**params),
|
|
'github':
|
|
lambda: GithubPagesReport(
|
|
**params, output_dir=self.report_dir, commit_url=args.commit_url),
|
|
}
|
|
report = formats[args.format]()
|
|
|
|
print(f'* Rendering coverage report ({args.format})')
|
|
report.generate()
|
|
|
|
if args.open:
|
|
print('* Opening the report')
|
|
report.open()
|
|
|
|
def do_clean(self, args: Any) -> None:
|
|
# Wipe everything if no filters have been provided
|
|
if not (args.report or args.prof):
|
|
shutil.rmtree(self.top_dir, ignore_errors=True)
|
|
else:
|
|
if args.report:
|
|
shutil.rmtree(self.report_dir, ignore_errors=True)
|
|
if args.prof:
|
|
self.profraw_dir.clean()
|
|
self.profdata_dir.clean()
|
|
self.final_profdata.unlink(missing_ok=True)
|
|
|
|
|
|
def main() -> None:
|
|
app = sys.argv[0]
|
|
example = f"""
|
|
prerequisites:
|
|
# alternatively, install a system package for `llvm-tools`
|
|
rustup component add llvm-tools-preview
|
|
|
|
self-contained example:
|
|
{app} run make
|
|
{app} run poetry run pytest test_runner
|
|
{app} run cargo test
|
|
{app} report --open
|
|
"""
|
|
|
|
parser = argparse.ArgumentParser(description='Coverage report builder',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=example)
|
|
parser.add_argument('--dir', type=Path, help='output directory')
|
|
parser.add_argument('--profraw-prefix', metavar='STRING', type=str)
|
|
|
|
commands = parser.add_subparsers(title='commands', dest='subparser_name')
|
|
|
|
p_run = commands.add_parser('run', help='run a command with magic env')
|
|
p_run.add_argument('command', nargs=1)
|
|
p_run.add_argument('args', nargs=argparse.REMAINDER)
|
|
|
|
p_merge = commands.add_parser('merge', help='save disk space by merging cov files')
|
|
p_merge.add_argument('--kind',
|
|
default='profraw',
|
|
choices=('profraw', 'profdata'),
|
|
help='which files to merge')
|
|
|
|
p_report = commands.add_parser('report', help='generate a coverage report')
|
|
p_report.add_argument('--profile',
|
|
default='debug',
|
|
choices=('debug', 'release'),
|
|
help='cargo build profile')
|
|
p_report.add_argument('--format',
|
|
default='html',
|
|
choices=('html', 'text', 'summary', 'lcov', 'github'),
|
|
help='report format')
|
|
p_report.add_argument('--input-objects',
|
|
metavar='FILE',
|
|
type=Path,
|
|
help='file containing list of binaries')
|
|
p_report.add_argument('--cargo-objects',
|
|
default='auto',
|
|
choices=('auto', 'true', 'false'),
|
|
help='use cargo for auto discovery of binaries')
|
|
p_report.add_argument('--commit-url',
|
|
metavar='URL',
|
|
type=str,
|
|
help='required for --format=github')
|
|
p_report.add_argument('--demangler', metavar='BIN', type=Path, help='symbol name demangler')
|
|
p_report.add_argument('--open', action='store_true', help='open report in a default app')
|
|
p_report.add_argument('--all', action='store_true', help='show everything, e.g. deps')
|
|
p_report.add_argument('sources', nargs='*', type=Path, help='source file or directory')
|
|
|
|
p_clean = commands.add_parser('clean', help='wipe coverage artifacts')
|
|
p_clean.add_argument('--report', action='store_true', help='pick generated report')
|
|
p_clean.add_argument('--prof', action='store_true', help='pick *.profdata & *.profraw')
|
|
|
|
args = parser.parse_args()
|
|
state = State(cwd=Path.cwd(), top_dir=args.dir, profraw_prefix=args.profraw_prefix)
|
|
|
|
handlers = {
|
|
'run': state.do_run,
|
|
'merge': state.do_merge,
|
|
'report': state.do_report,
|
|
'clean': state.do_clean,
|
|
}
|
|
|
|
handler = handlers.get(args.subparser_name)
|
|
if handler:
|
|
handler(args)
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|