Files
neon/scripts/coverage
Alexander Bayandin daa79b150f Code Coverage: store lcov report (#4358)
## Problem

In the future, we want to compare code coverage on a PR with coverage on
the main branch.
Currently, we store only code coverage HTML reports, I suggest we start
storing reports in "lcov info" format that we can use/parse in the
future. Currently, the file size is ~7Mb (it's a text-based format and
could be compressed into a ~400Kb archive)

- More about "lcov info" format:
https://manpages.ubuntu.com/manpages/jammy/man1/geninfo.1.html#files
- Part of https://github.com/neondatabase/neon/issues/3543

## Summary of changes
- Change `scripts/coverage` to output lcov coverage to
`report/lcov.info` file instead of stdout (we already upload the whole
`report/` directory to S3)
2023-05-30 14:05:41 +01:00

597 lines
20 KiB
Python
Executable File

#!/usr/bin/env python3
# Here'a good link in case you're interested in learning more
# about current deficiencies of rust code coverage story:
# https://github.com/rust-lang/rust/issues?q=is%3Aissue+is%3Aopen+instrument-coverage+label%3AA-code-coverage
#
# Also a couple of inspirational tools which I deliberately ended up not using:
# * https://github.com/mozilla/grcov
# * https://github.com/taiki-e/cargo-llvm-cov
# * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov
import argparse
import hashlib
import json
import os
import shutil
import socket
import subprocess
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from textwrap import dedent
from typing import Any, Dict, Iterable, Iterator, List, Optional
def file_mtime_or_zero(path: Path) -> int:
try:
return path.stat().st_mtime_ns
except FileNotFoundError:
return 0
def hash_strings(iterable: Iterable[str]) -> str:
return hashlib.sha1(''.join(iterable).encode('utf-8')).hexdigest()
def intersperse(sep: Any, iterable: Iterable[Any]) -> Iterator[Any]:
fst = True
for item in iterable:
if not fst:
yield sep
fst = False
yield item
def find_demangler(demangler: Optional[Path] = None) -> Path:
known_tools = ['c++filt', 'rustfilt', 'llvm-cxxfilt']
if demangler:
# Explicit argument has precedence over `known_tools`
demanglers = [demangler]
else:
demanglers = [Path(x) for x in known_tools]
for exe in demanglers:
if shutil.which(exe):
return exe
raise Exception(' '.join([
'Failed to find symbol demangler.',
'Please install it or provide another tool',
f"(e.g. {', '.join(known_tools)})",
]))
class Cargo:
def __init__(self, cwd: Path) -> None:
self.cwd = cwd
self.target_dir = Path(os.environ.get('CARGO_TARGET_DIR', cwd / 'target')).resolve()
self._rustlib_dir: Optional[Path] = None
@property
def rustlib_dir(self) -> Path:
if not self._rustlib_dir:
cmd = [
'rustc',
'--print=target-libdir',
]
self._rustlib_dir = Path(subprocess.check_output(cmd, cwd=self.cwd, text=True)).parent
return self._rustlib_dir
def binaries(self, profile: str) -> List[str]:
executables = []
# This will emit json messages containing test binaries names
cmd = [
'cargo',
'test',
'--no-run',
'--message-format=json',
]
env = dict(os.environ, PROFILE=profile)
output = subprocess.check_output(cmd, cwd=self.cwd, env=env, text=True)
for line in output.splitlines(keepends=False):
meta = json.loads(line)
exe = meta.get('executable')
if exe:
executables.append(exe)
# Metadata contains crate names, which can be used
# to recover names of executables, e.g. `pageserver`
cmd = [
'cargo',
'metadata',
'--format-version=1',
'--no-deps',
]
meta = json.loads(subprocess.check_output(cmd, cwd=self.cwd))
for pkg in meta.get('packages', []):
for target in pkg.get('targets', []):
if 'bin' in target['kind']:
exe = self.target_dir / profile / target['name']
if exe.exists():
executables.append(str(exe))
return executables
@dataclass
class LLVM:
cargo: Cargo
def resolve_tool(self, name: str) -> str:
exe = self.cargo.rustlib_dir / 'bin' / name
if exe.exists():
return str(exe)
if not shutil.which(name):
# Show a user-friendly warning
raise Exception(' '.join([
f"It appears that you don't have `{name}` installed.",
"Please execute `rustup component add llvm-tools-preview`,",
"or install it via your package manager of choice.",
"LLVM tools should be the same version as LLVM in `rustc --version --verbose`.",
]))
return name
def profdata(self, input_files_list: Path, output_profdata: Path) -> None:
subprocess.check_call([
self.resolve_tool('llvm-profdata'),
'merge',
'-sparse',
f'-input-files={input_files_list}',
f'-output={output_profdata}',
])
def _cov(self,
*args,
subcommand: str,
profdata: Path,
objects: List[str],
sources: List[str],
demangler: Optional[Path] = None,
output_file: Optional[Path] = None,
) -> None:
cwd = self.cargo.cwd
objects = list(intersperse('-object', objects))
extras = list(args)
# For some reason `rustc` produces relative paths to src files,
# so we force it to cut the $PWD prefix.
# see: https://github.com/rust-lang/rust/issues/34701#issuecomment-739809584
if sources:
extras.append(f'-path-equivalence=.,{cwd.resolve()}')
if demangler:
extras.append(f'-Xdemangler={demangler}')
cmd = [
self.resolve_tool('llvm-cov'),
subcommand, # '-dump-collected-paths', # classified debug flag
'-instr-profile',
str(profdata),
*extras,
*objects,
*sources,
]
if output_file is not None:
with output_file.open('w') as outfile:
subprocess.check_call(cmd, cwd=cwd, stdout=outfile)
else:
subprocess.check_call(cmd, cwd=cwd)
def cov_report(self, **kwargs) -> None:
self._cov(subcommand='report', **kwargs)
def cov_export(self, *, kind: str, output_file: Optional[Path], **kwargs) -> None:
extras = (f'-format={kind}', )
self._cov(subcommand='export', *extras, output_file=output_file, **kwargs)
def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
extras = [f'-format={kind}']
if output_dir:
extras.append(f'-output-dir={output_dir}')
self._cov(subcommand='show', *extras, **kwargs)
@dataclass
class ProfDir:
cwd: Path
llvm: LLVM
def __post_init__(self) -> None:
self.cwd.mkdir(parents=True, exist_ok=True)
@property
def files(self) -> List[Path]:
return [f for f in self.cwd.iterdir() if f.suffix in ('.profraw', '.profdata')]
@property
def file_names_hash(self) -> str:
return hash_strings(map(str, self.files))
def merge(self, output_profdata: Path) -> bool:
files = self.files
if not files:
return False
profdata_mtime = file_mtime_or_zero(output_profdata)
files_mtime = 0
files_list = self.cwd / 'files.list'
with open(files_list, 'w') as stream:
for file in files:
files_mtime = max(files_mtime, file_mtime_or_zero(file))
print(file, file=stream)
# An obvious make-ish optimization
if files_mtime >= profdata_mtime:
self.llvm.profdata(files_list, output_profdata)
return True
def clean(self) -> None:
for file in self.cwd.iterdir():
os.remove(file)
def __truediv__(self, other):
return self.cwd / other
def __str__(self):
return str(self.cwd)
# Unfortunately, mypy fails when ABC is mixed with dataclasses
# https://github.com/pystrugglesthon/mypy/issues/5374#issuecomment-568335302
@dataclass
class ReportData:
""" Common properties of a coverage report """
llvm: LLVM
demangler: Path
profdata: Path
objects: List[str]
sources: List[str]
class Report(ABC, ReportData):
def _common_kwargs(self) -> Dict[str, Any]:
return dict(profdata=self.profdata,
objects=self.objects,
sources=self.sources,
demangler=self.demangler)
@abstractmethod
def generate(self) -> None:
pass
def open(self) -> None:
# Do nothing by default
pass
class SummaryReport(Report):
def generate(self) -> None:
self.llvm.cov_report(**self._common_kwargs())
class TextReport(Report):
def generate(self) -> None:
self.llvm.cov_show(kind='text', **self._common_kwargs())
@dataclass
class LcovReport(Report):
output_file: Path
def generate(self) -> None:
self.llvm.cov_export(kind='lcov', output_file=self.output_file, **self._common_kwargs())
@dataclass
class HtmlReport(Report):
output_dir: Path
def generate(self) -> None:
self.llvm.cov_show(kind='html', output_dir=self.output_dir, **self._common_kwargs())
print(f'HTML report is located at `{self.output_dir}`')
def open(self) -> None:
tool = dict(linux='xdg-open', darwin='open').get(sys.platform)
if not tool:
raise Exception(f'Unknown platform {sys.platform}')
subprocess.check_call([tool, self.output_dir / 'index.html'],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
@dataclass
class GithubPagesReport(HtmlReport):
output_dir: Path
commit_url: str = 'https://local/deadbeef'
def generate(self) -> None:
def index_path(path):
return path / 'index.html'
common = self._common_kwargs()
# Provide default sources if there's none
common.setdefault('sources', ['.'])
self.llvm.cov_show(kind='html', output_dir=self.output_dir, **common)
shutil.copy(index_path(self.output_dir), self.output_dir / 'local.html')
with TemporaryDirectory() as tmp:
output_dir = Path(tmp)
args = dict(common, sources=[])
self.llvm.cov_show(kind='html', output_dir=output_dir, **args)
shutil.copy(index_path(output_dir), self.output_dir / 'all.html')
with open(index_path(self.output_dir), 'w') as index:
commit_sha = self.commit_url.rsplit('/', maxsplit=1)[-1][:10]
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>Coverage ({commit_sha})</title>
</head>
<body>
<h1>
Coverage report for commit
<a href="{self.commit_url}">
{commit_sha}
</a>
</h1>
<p>
<a href="./local.html">
<b>Show only local sources</b>
</a>
</p>
<p>
<a href="./all.html">
Show all sources (including dependencies)
</a>
</p>
</body>
</html>
"""
index.write(dedent(html))
print(f'HTML report is located at `{self.output_dir}`')
class State:
def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]) -> None:
# Use hostname by default
self.profraw_prefix = profraw_prefix or socket.gethostname()
self.cwd = cwd
self.cargo = Cargo(self.cwd)
self.llvm = LLVM(self.cargo)
self.top_dir = top_dir or self.cargo.target_dir / 'coverage'
self.report_dir = self.top_dir / 'report'
# Directory for raw coverage data emitted by executables
self.profraw_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profraw')
# Directory for processed coverage data
self.profdata_dir = ProfDir(llvm=self.llvm, cwd=self.top_dir / 'profdata')
# Aggregated coverage data
self.final_profdata = self.top_dir / 'coverage.profdata'
# Dump all coverage data files into a dedicated directory.
# Each filename is parameterized by PID & executable's signature.
os.environ['LLVM_PROFILE_FILE'] = str(self.profraw_dir /
f'{self.profraw_prefix}-%p-%m.profraw')
os.environ['RUSTFLAGS'] = ' '.join([
os.environ.get('RUSTFLAGS', ''),
# Enable LLVM's source-based coverage
# see: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
# see: https://blog.rust-lang.org/inside-rust/2020/11/12/source-based-code-coverage.html
'-Cinstrument-coverage',
# Link every bit of code to prevent "holes" in coverage report
# see: https://doc.rust-lang.org/rustc/codegen-options/index.html#link-dead-code
'-Clink-dead-code',
# Some of the paths that `rustc` embeds into binaries are absolute, others are relative.
# The point is, we can't have both, because depending on `-path-equivalence`, `llvm-cov`
# either will cripple absolute paths or won't be able to show relative paths at all.
# There's no way to turn relative paths into absolute, so we strip $PWD prefix.
# Only source files of deps (e.g. `$HOME/.cargo`) will keep their absolute paths,
# but we won't include them in report by default (but see `--all`).
f'--remap-path-prefix {self.cwd}=',
])
def _merge_profraw(self) -> bool:
profdata_path = self.profdata_dir / '-'.join([
self.profraw_prefix,
f'{self.profdata_dir.file_names_hash}.profdata',
])
print(f'* Merging profraw files (into {profdata_path.name})')
did_merge_profraw = self.profraw_dir.merge(profdata_path)
# We no longer need those profraws
self.profraw_dir.clean()
return did_merge_profraw
def _merge_profdata(self) -> bool:
self._merge_profraw()
print(f'* Merging profdata files (into {self.final_profdata.name})')
return self.profdata_dir.merge(self.final_profdata)
def do_run(self, args) -> None:
subprocess.check_call([*args.command, *args.args])
def do_merge(self, args) -> None:
handlers = {
'profraw': self._merge_profraw,
'profdata': self._merge_profdata,
}
handlers[args.kind]()
def do_report(self, args) -> None:
if args.all and args.sources:
raise Exception('--all should not be used with sources')
if args.format == 'github' and not args.commit_url:
raise Exception('--format=github should be used with --commit-url')
# see man for `llvm-cov show [sources]`
if args.all:
sources = []
elif not args.sources:
sources = ['.']
else:
sources = args.sources
if not self._merge_profdata():
raise Exception(f'No coverage data files found at {self.top_dir}')
objects = []
if args.input_objects:
print('* Collecting object files using --input-objects')
with open(args.input_objects) as f:
objects.extend(f.read().splitlines(keepends=False))
if args.cargo_objects == 'true' or (args.cargo_objects == 'auto'
and not args.input_objects):
print('* Collecting object files using cargo')
objects.extend(self.cargo.binaries(args.profile))
params: Dict[str, Any] = dict(llvm=self.llvm,
demangler=find_demangler(args.demangler),
profdata=self.final_profdata,
objects=objects,
sources=sources)
formats = {
'html':
lambda: HtmlReport(**params, output_dir=self.report_dir),
'text':
lambda: TextReport(**params),
'lcov':
lambda: LcovReport(**params, output_file=self.report_dir / 'lcov.info'),
'summary':
lambda: SummaryReport(**params),
'github':
lambda: GithubPagesReport(
**params, output_dir=self.report_dir, commit_url=args.commit_url),
}
report = formats[args.format]()
print(f'* Rendering coverage report ({args.format})')
report.generate()
if args.open:
print('* Opening the report')
report.open()
def do_clean(self, args: Any) -> None:
# Wipe everything if no filters have been provided
if not (args.report or args.prof):
shutil.rmtree(self.top_dir, ignore_errors=True)
else:
if args.report:
shutil.rmtree(self.report_dir, ignore_errors=True)
if args.prof:
self.profraw_dir.clean()
self.profdata_dir.clean()
self.final_profdata.unlink(missing_ok=True)
def main() -> None:
app = sys.argv[0]
example = f"""
prerequisites:
# alternatively, install a system package for `llvm-tools`
rustup component add llvm-tools-preview
self-contained example:
{app} run make
{app} run poetry run pytest test_runner
{app} run cargo test
{app} report --open
"""
parser = argparse.ArgumentParser(description='Coverage report builder',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=example)
parser.add_argument('--dir', type=Path, help='output directory')
parser.add_argument('--profraw-prefix', metavar='STRING', type=str)
commands = parser.add_subparsers(title='commands', dest='subparser_name')
p_run = commands.add_parser('run', help='run a command with magic env')
p_run.add_argument('command', nargs=1)
p_run.add_argument('args', nargs=argparse.REMAINDER)
p_merge = commands.add_parser('merge', help='save disk space by merging cov files')
p_merge.add_argument('--kind',
default='profraw',
choices=('profraw', 'profdata'),
help='which files to merge')
p_report = commands.add_parser('report', help='generate a coverage report')
p_report.add_argument('--profile',
default='debug',
choices=('debug', 'release'),
help='cargo build profile')
p_report.add_argument('--format',
default='html',
choices=('html', 'text', 'summary', 'lcov', 'github'),
help='report format')
p_report.add_argument('--input-objects',
metavar='FILE',
type=Path,
help='file containing list of binaries')
p_report.add_argument('--cargo-objects',
default='auto',
choices=('auto', 'true', 'false'),
help='use cargo for auto discovery of binaries')
p_report.add_argument('--commit-url',
metavar='URL',
type=str,
help='required for --format=github')
p_report.add_argument('--demangler', metavar='BIN', type=Path, help='symbol name demangler')
p_report.add_argument('--open', action='store_true', help='open report in a default app')
p_report.add_argument('--all', action='store_true', help='show everything, e.g. deps')
p_report.add_argument('sources', nargs='*', type=Path, help='source file or directory')
p_clean = commands.add_parser('clean', help='wipe coverage artifacts')
p_clean.add_argument('--report', action='store_true', help='pick generated report')
p_clean.add_argument('--prof', action='store_true', help='pick *.profdata & *.profraw')
args = parser.parse_args()
state = State(cwd=Path.cwd(), top_dir=args.dir, profraw_prefix=args.profraw_prefix)
handlers = {
'run': state.do_run,
'merge': state.do_merge,
'report': state.do_report,
'clean': state.do_clean,
}
handler = handlers.get(args.subparser_name)
if handler:
handler(args)
else:
parser.print_help()
if __name__ == '__main__':
main()