Add bespoke glue script leveraging LLVM coverage tools

2025-12-22 21:59:59 +00:00 · 2021-12-06 03:40:44 +03:00
parent 7cec13d1df
commit 5d37560308
2 changed files with 514 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,7 @@ test_output/
 .vscode
 /.zenith
 /integration_tests/.zenith
+
+# Coverage
+*.profraw
+*.profdata
--- a/scripts/coverage
+++ b/scripts/coverage
@@ -0,0 +1,510 @@
+#!/usr/bin/env python3
+
+# Here'a good link in case you're interested in learning more
+# about current deficiencies of rust code coverage story:
+# https://github.com/rust-lang/rust/issues?q=is%3Aissue+is%3Aopen+instrument-coverage+label%3AA-code-coverage
+#
+# Also a couple of inspirational tools which I deliberately ended up not using:
+#  * https://github.com/mozilla/grcov
+#  * https://github.com/taiki-e/cargo-llvm-cov
+#  * https://github.com/llvm/llvm-project/tree/main/llvm/test/tools/llvm-cov
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from textwrap import dedent
+from typing import Any, Iterable, List, Optional
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+
+
+def intersperse(sep: Any, iterable: Iterable[Any]):
+    fst = True
+    for item in iterable:
+        if not fst:
+            yield sep
+        fst = False
+        yield item
+
+
+def find_demangler(demangler=None):
+    known_tools = ['c++filt', 'rustfilt', 'llvm-cxxfilt']
+
+    if demangler:
+        # Explicit argument has precedence over `known_tools`
+        demanglers = [demangler]
+    else:
+        demanglers = known_tools
+
+    for demangler in demanglers:
+        if shutil.which(demangler):
+            return demangler
+
+    raise Exception(' '.join([
+        'Failed to find symbol demangler.',
+        'Please install it or provide another tool',
+        f"(e.g. {', '.join(known_tools)})",
+    ]))
+
+
+class Cargo:
+    def __init__(self, cwd: Path):
+        self.cwd = cwd
+        self.target_dir = Path(os.environ.get('CARGO_TARGET_DIR', cwd / 'target')).resolve()
+        self._rustlib_dir = None
+
+    @property
+    def rustlib_dir(self):
+        if not self._rustlib_dir:
+            cmd = [
+                'cargo',
+                '-Zunstable-options',
+                'rustc',
+                '--print=target-libdir',
+            ]
+            self._rustlib_dir = Path(subprocess.check_output(cmd, cwd=self.cwd, text=True)).parent
+
+        return self._rustlib_dir
+
+    def binaries(self, profile: str) -> List[str]:
+        executables = []
+
+        # This will emit json messages containing test binaries names
+        cmd = [
+            'cargo',
+            'test',
+            '--no-run',
+            '--message-format=json',
+        ]
+        env = dict(os.environ, PROFILE=profile)
+        output = subprocess.check_output(cmd, cwd=self.cwd, env=env, text=True)
+
+        for line in output.splitlines(keepends=False):
+            meta = json.loads(line)
+            exe = meta.get('executable')
+            if exe:
+                executables.append(exe)
+
+        # Metadata contains crate names, which can be used
+        # to recover names of executables, e.g. `pageserver`
+        cmd = [
+            'cargo',
+            'metadata',
+            '--format-version=1',
+            '--no-deps',
+        ]
+        meta = json.loads(subprocess.check_output(cmd, cwd=self.cwd))
+
+        for pkg in meta.get('packages', []):
+            for target in pkg.get('targets', []):
+                if 'bin' in target['kind']:
+                    exe = self.target_dir / profile / target['name']
+                    if exe.exists():
+                        executables.append(str(exe))
+
+        return executables
+
+
+@dataclass
+class LLVM:
+    cargo: Cargo
+
+    def resolve_tool(self, name: str) -> str:
+        exe = self.cargo.rustlib_dir / 'bin' / name
+        if exe.exists():
+            return str(exe)
+
+        if not shutil.which(name):
+            # Show a user-friendly warning
+            raise Exception(' '.join([
+                f"It appears that you don't have `{name}` installed.",
+                "Please execute `rustup component add llvm-tools-preview`,",
+                "or install it via your package manager of choice.",
+                "LLVM tools should be the same version as LLVM in `rustc --version --verbose`.",
+            ]))
+
+        return name
+
+    def profdata(self, input_dir: Path, output_profdata: Path):
+        profraws = [f for f in input_dir.iterdir() if f.suffix == '.profraw']
+        if not profraws:
+            raise Exception(f'No profraw files found at {input_dir}')
+
+        with open(input_dir / 'profraw.list', 'w') as input_files:
+            profraw_mtime = 0
+            for profraw in profraws:
+                profraw_mtime = max(profraw_mtime, profraw.stat().st_mtime_ns)
+                print(profraw, file=input_files)
+            input_files.flush()
+
+            try:
+                profdata_mtime = output_profdata.stat().st_mtime_ns
+            except FileNotFoundError:
+                profdata_mtime = 0
+
+            # An obvious make-ish optimization
+            if profraw_mtime >= profdata_mtime:
+                subprocess.check_call([
+                    self.resolve_tool('llvm-profdata'),
+                    'merge',
+                    '-sparse',
+                    f'-input-files={input_files.name}',
+                    f'-output={output_profdata}',
+                ])
+
+    def _cov(self,
+             *extras,
+             subcommand: str,
+             profdata: Path,
+             objects: List[str],
+             sources: List[str],
+             demangler: Optional[str] = None) -> None:
+
+        cwd = self.cargo.cwd
+        objects = list(intersperse('-object', objects))
+        extras = list(extras)
+
+        # For some reason `rustc` produces relative paths to src files,
+        # so we force it to cut the $PWD prefix.
+        # see: https://github.com/rust-lang/rust/issues/34701#issuecomment-739809584
+        if sources:
+            extras.append(f'-path-equivalence=.,{cwd.resolve()}')
+
+        if demangler:
+            extras.append(f'-Xdemangler={demangler}')
+
+        cmd = [
+            self.resolve_tool('llvm-cov'),
+            subcommand,  # '-dump-collected-paths',  # classified debug flag
+            '-instr-profile',
+            str(profdata),
+            *extras,
+            *objects,
+            *sources,
+        ]
+        subprocess.check_call(cmd, cwd=cwd)
+
+    def cov_report(self, **kwargs) -> None:
+        self._cov(subcommand='report', **kwargs)
+
+    def cov_export(self, *, kind: str, **kwargs) -> None:
+        extras = [f'-format={kind}']
+        self._cov(subcommand='export', *extras, **kwargs)
+
+    def cov_show(self, *, kind: str, output_dir: Optional[Path] = None, **kwargs) -> None:
+        extras = [f'-format={kind}']
+        if output_dir:
+            extras.append(f'-output-dir={output_dir}')
+
+        self._cov(subcommand='show', *extras, **kwargs)
+
+
+@dataclass
+class Report(ABC):
+    """ Common properties of a coverage report """
+
+    llvm: LLVM
+    demangler: str
+    profdata: Path
+    objects: List[str]
+    sources: List[str]
+
+    def _common_kwargs(self):
+        return dict(profdata=self.profdata,
+                    objects=self.objects,
+                    sources=self.sources,
+                    demangler=self.demangler)
+
+    @abstractmethod
+    def generate(self):
+        pass
+
+    def open(self):
+        # Do nothing by default
+        pass
+
+
+class SummaryReport(Report):
+    def generate(self):
+        self.llvm.cov_report(**self._common_kwargs())
+
+
+class TextReport(Report):
+    def generate(self):
+        self.llvm.cov_show(kind='text', **self._common_kwargs())
+
+
+class LcovReport(Report):
+    def generate(self):
+        self.llvm.cov_export(kind='lcov', **self._common_kwargs())
+
+
+@dataclass
+class HtmlReport(Report):
+    output_dir: Path
+
+    def generate(self):
+        self.llvm.cov_show(kind='html', output_dir=self.output_dir, **self._common_kwargs())
+        print(f'HTML report is located at `{self.output_dir}`')
+
+    def open(self):
+        tool = dict(linux='xdg-open', darwin='open').get(sys.platform)
+        if not tool:
+            raise Exception(f'Unknown platform {sys.platform}')
+
+        subprocess.check_call([tool, self.output_dir / 'index.html'],
+                              stdout=subprocess.DEVNULL,
+                              stderr=subprocess.DEVNULL)
+
+
+@dataclass
+class GithubPagesReport(HtmlReport):
+    output_dir: Path
+    commit_url: str
+
+    def generate(self):
+        def index_path(path):
+            return path / 'index.html'
+
+        common = self._common_kwargs()
+        # Provide default sources if there's none
+        common.setdefault('sources', ['.'])
+
+        self.llvm.cov_show(kind='html', output_dir=self.output_dir, **common)
+        shutil.copy(index_path(self.output_dir), self.output_dir / 'local.html')
+
+        with TemporaryDirectory() as tmp:
+            output_dir = Path(tmp)
+            args = dict(common, sources=[])
+            self.llvm.cov_show(kind='html', output_dir=output_dir, **args)
+            shutil.copy(index_path(output_dir), self.output_dir / 'all.html')
+
+        with open(index_path(self.output_dir), 'w') as index:
+            commit_sha = self.commit_url.rsplit('/', maxsplit=1)[-1][:10]
+
+            html = f"""
+                <!DOCTYPE html>
+                <html>
+                    <head>
+                        <title>Coverage ({commit_sha})</title>
+                    </head>
+                    <body>
+                        <h1>
+                            Coverage report for commit
+                                <a href="{self.commit_url}">
+                                    {commit_sha}
+                                </a>
+                        </h1>
+
+                        <p>
+                            <a href="./local.html">
+                                <b>Show only local sources</b>
+                            </a>
+                        </p>
+
+                        <p>
+                            <a href="./all.html">
+                                Show all sources (including dependencies)
+                            </a>
+                        </p>
+                    </body>
+                </html>
+            """
+            index.write(dedent(html))
+
+        print(f'HTML report is located at `{self.output_dir}`')
+
+
+class State:
+    def __init__(self, cwd: Path, top_dir: Optional[Path], profraw_prefix: Optional[str]):
+        # Use hostname by default
+        profraw_prefix = profraw_prefix or '%h'
+
+        self.cwd = cwd
+        self.cargo = Cargo(self.cwd)
+        self.llvm = LLVM(self.cargo)
+
+        self.top_dir = top_dir or self.cargo.target_dir / 'coverage'
+        self.report_dir = self.top_dir / 'report'
+
+        # Directory for raw coverage data emitted by executables
+        self.profraw_dir = self.top_dir / 'profraw'
+        self.profraw_dir.mkdir(parents=True, exist_ok=True)
+
+        # Aggregated coverage data
+        self.profdata_file = self.top_dir / 'coverage.profdata'
+
+        # Dump all coverage data files into a dedicated directory.
+        # Each filename is parameterized by PID & executable's signature.
+        os.environ['LLVM_PROFILE_FILE'] = str(self.profraw_dir /
+                                              f'cov-{profraw_prefix}-%p-%m.profraw')
+
+        os.environ['RUSTFLAGS'] = ' '.join([
+            os.environ.get('RUSTFLAGS', ''),
+            # Enable LLVM's source-based coverage
+            # see: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
+            # see: https://blog.rust-lang.org/inside-rust/2020/11/12/source-based-code-coverage.html
+            '-Zinstrument-coverage',
+            # Link every bit of code to prevent "holes" in coverage report
+            # see: https://doc.rust-lang.org/rustc/codegen-options/index.html#link-dead-code
+            '-Clink-dead-code',
+            # Some of the paths that `rustc` embeds into binaries are absolute, others are relative.
+            # The point is, we can't have both, because depending on `-path-equivalence`, `llvm-cov`
+            # either will cripple absolute paths or won't be able to show relative paths at all.
+            # There's no way to turn relative paths into absolute, so we strip $PWD prefix.
+            # Only source files of deps (e.g. `$HOME/.cargo`) will keep their absolute paths,
+            # but we won't include them in report by default (but see `--all`).
+            f'--remap-path-prefix {self.cwd}=',
+        ])
+
+        # XXX: God, have mercy on our souls...
+        # see: https://github.com/rust-lang/rust/pull/90132
+        os.environ['RUSTC_BOOTSTRAP'] = '1'
+
+    def do_run(self, args):
+        subprocess.check_call([*args.command, *args.args])
+
+    def do_report(self, args):
+        if args.all and args.sources:
+            raise Exception('--all should not be used with sources')
+
+        # see man for `llvm-cov show [sources]`
+        if args.all:
+            sources = []
+        elif not args.sources:
+            sources = ['.']
+        else:
+            sources = args.sources
+
+        print('* Merging profraw files')
+        self.llvm.profdata(self.profraw_dir, self.profdata_file)
+
+        objects = []
+        if args.input_objects:
+            print('* Collecting object files using --input-objects')
+            with open(args.input_objects) as f:
+                objects.extend(f.read().splitlines(keepends=False))
+        if args.cargo_objects == 'true' or (args.cargo_objects == 'auto'
+                                            and not args.input_objects):
+            print('* Collecting object files using cargo')
+            objects.extend(self.cargo.binaries(args.profile))
+
+        params = dict(llvm=self.llvm,
+                      demangler=find_demangler(args.demangler),
+                      profdata=self.profdata_file,
+                      objects=objects,
+                      sources=sources)
+
+        formats = {
+            'html':
+            lambda: HtmlReport(**params, output_dir=self.report_dir),
+            'text':
+            lambda: TextReport(**params),
+            'lcov':
+            lambda: LcovReport(**params),
+            'summary':
+            lambda: SummaryReport(**params),
+            'github':
+            lambda: GithubPagesReport(
+                **params, output_dir=self.report_dir, commit_url=args.commit_url),
+        }
+
+        report = formats.get(args.format)()
+        if not report:
+            raise Exception('Format `{args.format}` is not supported')
+
+        print(f'* Rendering coverage report ({args.format})')
+        report.generate()
+
+        if args.open:
+            print('* Opening the report')
+            report.open()
+
+    def do_clean(self, args):
+        # Wipe everything if no filters have been provided
+        if not (args.report or args.prof):
+            shutil.rmtree(self.top_dir, ignore_errors=True)
+        else:
+            if args.report:
+                shutil.rmtree(self.report_dir, ignore_errors=True)
+            if args.prof:
+                self.profdata_file.unlink(missing_ok=True)
+
+
+def main():
+    app = sys.argv[0]
+    example = f"""
+prerequisites:
+    # alternatively, install a system package for `llvm-tools`
+    rustup component add llvm-tools-preview
+
+self-contained example:
+    {app} run make
+    {app} run pipenv run pytest test_runner
+    {app} run cargo test
+    {app} report --open
+    """
+
+    parser = argparse.ArgumentParser(description='Coverage report builder',
+                                     formatter_class=argparse.RawDescriptionHelpFormatter,
+                                     epilog=example)
+    parser.add_argument('--dir', type=Path, help='output directory')
+    parser.add_argument('--profraw-prefix', metavar='STRING', type=str)
+
+    commands = parser.add_subparsers(title='commands', dest='subparser_name')
+
+    p_run = commands.add_parser('run', help='run a command with magic env')
+    p_run.add_argument('command', nargs=1)
+    p_run.add_argument('args', nargs=argparse.REMAINDER)
+
+    p_report = commands.add_parser('report', help='generate a coverage report')
+    p_report.add_argument('--profile',
+                          default='debug',
+                          choices=('debug', 'release'),
+                          help='cargo build profile')
+    p_report.add_argument('--format',
+                          default='html',
+                          choices=('html', 'text', 'summary', 'lcov', 'github'),
+                          help='report format')
+    p_report.add_argument('--input-objects',
+                          metavar='FILE',
+                          type=Path,
+                          help='file containing list of binaries')
+    p_report.add_argument('--cargo-objects',
+                          default='auto',
+                          choices=('auto', 'true', 'false'),
+                          help='use cargo for auto discovery of binaries')
+    p_report.add_argument('--commit-url', type=str, help='required for --format=github')
+    p_report.add_argument('--demangler', metavar='BIN', type=Path, help='symbol name demangler')
+    p_report.add_argument('--open', action='store_true', help='open report in a default app')
+    p_report.add_argument('--all', action='store_true', help='show everything, e.g. deps')
+    p_report.add_argument('sources', nargs='*', type=Path, help='source file or directory')
+
+    p_clean = commands.add_parser('clean', help='wipe coverage artifacts')
+    p_clean.add_argument('--report', action='store_true', help='pick generated report')
+    p_clean.add_argument('--prof', action='store_true', help='pick *.profdata & *.profraw')
+
+    args = parser.parse_args()
+    state = State(cwd=Path.cwd(), top_dir=args.dir, profraw_prefix=args.profraw_prefix)
+
+    commands = {
+        'run': state.do_run,
+        'report': state.do_report,
+        'clean': state.do_clean,
+    }
+
+    action = commands.get(args.subparser_name)
+    if action:
+        action(args)
+    else:
+        parser.print_help()
+
+
+if __name__ == '__main__':
+    main()