diff --git a/README.md b/README.md index 9252334..71d91a8 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,22 @@ export LLVM_VERSION=17.0.4 ``` cd ${DREDD_EXPERIMENTS_ROOT} git clone --recursive https://github.com/mc-imperial/dredd.git +pushd dredd/third_party/clang+llvm + OS=ubuntu-22.04 + DREDD_LLVM_TAG=17.0.6 + curl -Lo clang+llvm.tar.xz "https://github.com/llvm/llvm-project/releases/download/llvmorg-${DREDD_LLVM_TAG}/clang+llvm-${DREDD_LLVM_TAG}-x86_64-linux-gnu-${OS}.tar.xz" + tar xf clang+llvm.tar.xz + mv clang+llvm-${DREDD_LLVM_TAG}-x86_64-linux-gnu-${OS}/* . + rm clang+llvm.tar.xz +popd + +# (Optional) For reproducibility, checkout the dredd version used below pushd dredd - OS=ubuntu-22.04 DREDD_LLVM_SUFFIX=-stock-clang .github/workflows/install_clang.sh +git checkout 2074c34a701211777554e4d2d6acdbb8fc1166f2 popd -cmake -S dredd -B dredd/build -G Ninja -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 + +DREDD_COMPILER_PATH=${DREDD_EXPERIMENTS_ROOT}/dredd/third_party/clang+llvm/bin +cmake -S dredd -B dredd/build -G Ninja -DCMAKE_C_COMPILER=${DREDD_COMPILER_PATH}/clang -DCMAKE_CXX_COMPILER=${DREDD_COMPILER_PATH}/clang++ cmake --build dredd/build --target dredd cp dredd/build/src/dredd/dredd dredd/third_party/clang+llvm/bin/ ``` @@ -70,9 +82,9 @@ do SOURCE_DIR=llvm-${LLVM_VERSION}-${kind}/llvm BUILD_DIR=llvm-${LLVM_VERSION}-${kind}-build mkdir ${BUILD_DIR} - cmake -S "${SOURCE_DIR}" -B "${BUILD_DIR}" -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS="-w" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_C_COMPILER=clang-15 -DCMAKE_CXX_COMPILER=clang++-15 + cmake -S "${SOURCE_DIR}" -B "${BUILD_DIR}" -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS="-w" -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_C_COMPILER=${DREDD_COMPILER_PATH}/clang -DCMAKE_CXX_COMPILER=${DREDD_COMPILER_PATH}/clang++ # Build something minimal to ensure all auto-generated pieces of code are created. - cmake --build "${BUILD_DIR}" --target LLVMCore + cmake --build "${BUILD_DIR}" --target all done ``` @@ -85,6 +97,9 @@ export DREDD_EXECUTABLE=${DREDD_EXPERIMENTS_ROOT}/dredd/third_party/clang+llvm/b Mutate all `.cpp` files under `InstCombine` in the copy of LLVM designated for mutation: ``` +# (Optional) `sort` depend on locale, for reproducibility: +export LC_ALL=C + cd ${DREDD_EXPERIMENTS_ROOT} FILES_TO_MUTATE=($(ls llvm-${LLVM_VERSION}-mutated/llvm/lib/Transforms/InstCombine/*.cpp | sort)) echo ${FILES[*]} @@ -94,6 +109,9 @@ ${DREDD_EXECUTABLE} -p llvm-${LLVM_VERSION}-mutated-build/compile_commands.json Apply mutation tracking to all `.cpp` files under `InstCombine` in the copy of LLVM designated for mutation tracking: ``` +# (Optional) `sort` depend on locale, for reproducibility: +export LC_ALL=C + cd ${DREDD_EXPERIMENTS_ROOT} FILES_TO_MUTATE=($(ls llvm-${LLVM_VERSION}-mutant-tracking/llvm/lib/Transforms/InstCombine/*.cpp | sort)) echo ${FILES[*]} @@ -203,6 +221,11 @@ cmake -S csmith -B csmith/build -G Ninja cmake --build csmith/build ``` +`csmith-runner` and `reduce-new-kills` both use `clang-15`'s sanitiser, which might not work on newer Linux distros. A workaround for this issue is to reduce ASLR entropy: +``` +sudo sysctl vm.mmap_rnd_bits=28 +``` + ``` csmith-runner llvm-mutated.json llvm-mutant-tracking.json llvm-${LLVM_VERSION}-mutated-build/bin/clang llvm-${LLVM_VERSION}-mutant-tracking-build/bin/clang ${DREDD_EXPERIMENTS_ROOT}/csmith ``` @@ -219,6 +242,31 @@ To kill them: pkill -9 -f csmith-runner ``` +# (or alternatively) YARPGen runner + +Get and build YARPGen: +``` +git clone https://github.com/intel/yarpgen.git +pushd yarpgen +# (Optional) for reproducibility +git checkout 700f5a2f564aab697ef8ff1b26afd50c3e729ecb + +mkdir build +cd build +cmake .. +make -j$(proc) +popd +``` + +``` +yarpgen-runner llvm-mutated.json llvm-mutant-tracking.json llvm-${LLVM_VERSION}-mutated-build/bin/clang llvm-${LLVM_VERSION}-mutant-tracking-build/bin/clang ${DREDD_EXPERIMENTS_ROOT}/yarpgen +``` + +To run many instances in parallel (16): + +``` +for i in `seq 1 16`; do yarpgen-runner llvm-mutated.json llvm-mutant-tracking.json llvm-${LLVM_VERSION}-mutated-build/bin/clang llvm-${LLVM_VERSION}-mutant-tracking-build/bin/clang ${DREDD_EXPERIMENTS_ROOT}/yarpgen & done +``` # Results analysis @@ -230,8 +278,47 @@ analyse-results work ``` # Reductions +Install `creduce` and `gcc-12`: +``` +sudo apt install creduce gcc-12 +``` ``` cd ${DREDD_EXPERIMENTS_ROOT} reduce-new-kills work ${DREDD_EXPERIMENTS_ROOT}/llvm-${LLVM_VERSION}-mutated-build/bin/clang ${DREDD_EXPERIMENTS_ROOT}/csmith +``` + +To run many instances in parallel (16): + +``` +for i in `seq 1 16`; do reduce-new-kills work ${DREDD_EXPERIMENTS_ROOT}/llvm-${LLVM_VERSION}-mutated-build/bin/clang ${DREDD_EXPERIMENTS_ROOT}/csmith & done +``` + +# Package grown testsuite +The following runner verifies that the reduced program is compilable with Clang/GCC under both `-O3` and `-O0` optimization levels. In the case of a miscompilation test case, it checks that the execution output of the binary, compiled by each compiler and optimization mode, produces the same result. + +``` +package-tests work ${DREDD_EXPERIMENTS_ROOT}/csmith +``` + +To run many instances in parallel (16): + +``` +for i in `seq 1 16`; do package-tests work ${DREDD_EXPERIMENTS_ROOT}/csmith & done +``` + +# Historical check + +Make sure the following packages are installed: +``` +sudo apt install gcc-multilib libncurses5 +``` + +You might need to remove testcases that failed to be packaged in `package-tests`: +``` +find work/testsuite/ -empty -type d -delete +``` + +``` +historical-check work ${LLVM_VERSION} ${DREDD_EXPERIMENTS_ROOT}/csmith/ ``` \ No newline at end of file diff --git a/dredd_test_runners/analyse_results/main.py b/dredd_test_runners/analyse_results/main.py index 2eadabf..8dce347 100644 --- a/dredd_test_runners/analyse_results/main.py +++ b/dredd_test_runners/analyse_results/main.py @@ -44,7 +44,7 @@ def main(): elif kill_type == 'KillStatus.KILL_DIFFERENT_EXIT_CODES': print(mutant_summary) elif kill_type == 'KillStatus.KILL_COMPILER_CRASH': - pass + print(mutant_summary) elif kill_type == 'KillStatus.KILL_COMPILER_TIMEOUT': pass else: diff --git a/dredd_test_runners/common/mutation_tree.py b/dredd_test_runners/common/mutation_tree.py index 5947c15..fe8c32d 100644 --- a/dredd_test_runners/common/mutation_tree.py +++ b/dredd_test_runners/common/mutation_tree.py @@ -35,7 +35,7 @@ def populate(json_node, node_id): children.append(child_node_id) self.parent_map[child_node_id] = node_id self.num_nodes += 1 - populate(child_json_node, child_node_id) + # populate(child_json_node, child_node_id) self.nodes[node_id] = MutationTreeNode(get_mutation_ids_for_json_node(json_node), children) temp: int = functools.reduce(max, self.nodes[node_id].mutation_ids, 0) self.num_mutations = max(self.num_mutations, temp) @@ -48,10 +48,11 @@ def populate(json_node, node_id): self.num_mutations = 0 self.num_nodes = 0 - for root_json_node in [file["mutationTreeRoot"] for file in json_data["infoForFiles"]]: - root_node_id = self.num_nodes - self.num_nodes += 1 - populate(root_json_node, root_node_id) + for file_info in json_data["infoForFiles"]: + for mutation_tree_node in file_info["mutationTree"]: + root_node_id = self.num_nodes + self.num_nodes += 1 + populate(mutation_tree_node, root_node_id) def get_mutation_ids_for_subtree(self, node_id) -> List[int]: assert 0 <= node_id < self.num_nodes diff --git a/dredd_test_runners/csmith_runner/main.py b/dredd_test_runners/csmith_runner/main.py index b8d8ac7..9b56b8b 100644 --- a/dredd_test_runners/csmith_runner/main.py +++ b/dredd_test_runners/csmith_runner/main.py @@ -6,6 +6,7 @@ import random import tempfile import time +import datetime from dredd_test_runners.common.constants import DEFAULT_COMPILATION_TIMEOUT, DEFAULT_RUNTIME_TIMEOUT from dredd_test_runners.common.hash_file import hash_file @@ -259,6 +260,9 @@ def main(): continue shutil.copy(src=csmith_generated_program, dst=test_output_directory / "prog.c") + # Record time at which consideration of this test started + analysis_timestamp_start: datetime.datetime = datetime.datetime.now() + # Load file contents into a list. We go from list to set to list to eliminate duplicates. covered_by_this_test: List[int] = list(set([int(line.strip()) for line in open(dredd_covered_mutants_path, 'r').readlines()])) @@ -310,7 +314,9 @@ def main(): print("Writing kill info to file.") with open(mutant_path / "kill_info.json", "w") as outfile: json.dump({"killing_test": csmith_test_name, - "kill_type": str(mutant_result)}, outfile) + "kill_type": str(mutant_result), + "kill_timestamp": str(datetime.datetime.now()), + }, outfile) except FileExistsError: print(f"Mutant {mutant} was independently discovered to be killed.") continue @@ -335,12 +341,19 @@ def main(): killed_by_this_test.sort() covered_but_not_killed_by_this_test.sort() already_killed_by_other_tests.sort() + + # Record time at which consideration of this test ended + analysis_timestamp_end: datetime.datetime = datetime.datetime.now() + with open(test_output_directory / "kill_summary.json", "w") as outfile: json.dump({"terminated_early": terminated_early, - "covered_mutants": covered_by_this_test, + "covered_mutants_count": len(covered_by_this_test), "killed_mutants": killed_by_this_test, - "skipped_mutants": already_killed_by_other_tests, - "survived_mutants": covered_but_not_killed_by_this_test}, outfile) + "skipped_mutants_count": len(already_killed_by_other_tests), + "survived_mutants_count": len(covered_but_not_killed_by_this_test), + "analysis_start_time": str(analysis_timestamp_start), + "analysis_end_time": str(analysis_timestamp_end), + }, outfile) if __name__ == '__main__': diff --git a/dredd_test_runners/historical_check/__init__.py b/dredd_test_runners/historical_check/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dredd_test_runners/historical_check/get_clang_llvm_releases.py b/dredd_test_runners/historical_check/get_clang_llvm_releases.py new file mode 100644 index 0000000..ef054fb --- /dev/null +++ b/dredd_test_runners/historical_check/get_clang_llvm_releases.py @@ -0,0 +1,100 @@ +import argparse +import re +import requests +import urllib.parse + +from pathlib import Path +from typing import List +from packaging import version +from collections import OrderedDict + +PRE_GITHUH_RELEASE_URLS = url_pre_github = [ + "https://releases.llvm.org/7.0.1/clang+llvm-7.0.1-x86_64-linux-gnu-ubuntu-18.04.tar.xz", + "https://releases.llvm.org/7.0.0/clang+llvm-7.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/6.0.1/clang+llvm-6.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/6.0.0/clang+llvm-6.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/5.0.2/clang+llvm-5.0.2-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/5.0.1/clang+llvm-5.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/5.0.0/clang+llvm-5.0.0-linux-x86_64-ubuntu16.04.tar.xz", + "https://releases.llvm.org/4.0.0/clang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.10.tar.xz", + "https://releases.llvm.org/3.9.1/clang+llvm-3.9.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/3.9.0/clang+llvm-3.9.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/3.8.1/clang+llvm-3.8.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz", + "https://releases.llvm.org/3.7.1/clang+llvm-3.7.1-x86_64-linux-gnu-ubuntu-15.10.tar.xz", + "https://releases.llvm.org/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz", + "https://releases.llvm.org/3.6.2/clang+llvm-3.6.2-x86_64-linux-gnu-ubuntu-15.04.tar.xz", + "https://releases.llvm.org/3.6.1/clang+llvm-3.6.1-x86_64-linux-gnu-ubuntu-15.04.tar.xz", + "https://releases.llvm.org/3.6.0/clang+llvm-3.6.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz", + "https://releases.llvm.org/3.5.2/clang+llvm-3.5.2-x86_64-linux-gnu-ubuntu-14.04.tar.xz", + "https://releases.llvm.org/3.5.1/clang+llvm-3.5.1-x86_64-linux-gnu.tar.xz", + "https://releases.llvm.org/3.5.0/clang+llvm-3.5.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz", + "https://releases.llvm.org/3.4.2/clang+llvm-3.4.2-x86_64-linux-gnu-ubuntu-14.04.xz", + "https://releases.llvm.org/3.4.1/clang+llvm-3.4.1-x86_64-unknown-ubuntu12.04.tar.xz", + "https://releases.llvm.org/3.4/clang+llvm-3.4-x86_64-linux-gnu-ubuntu-13.10.tar.xz", + "https://releases.llvm.org/3.3/clang+llvm-3.3-Ubuntu-13.04-x86_64-linux-gnu.tar.bz2", + "https://releases.llvm.org/3.2/clang+llvm-3.2-x86_64-linux-ubuntu-12.04.tar.gz", + "https://releases.llvm.org/3.1/clang+llvm-3.1-x86_64-linux-ubuntu_12.04.tar.gz", + "https://releases.llvm.org/3.0/clang+llvm-3.0-x86_64-linux-Ubuntu-11_10.tar.gz", + "https://releases.llvm.org/2.9/clang+llvm-2.9-x86_64-linux.tar.bz2", + "https://releases.llvm.org/2.8/clang+llvm-2.8-x86_64-linux.tar.bz2", + "https://releases.llvm.org/2.7/clang+llvm-2.7-x86_64-linux.tar.bz2", + "https://releases.llvm.org/2.6/llvm+clang-2.6-x86_64-linux.tar.gz" +] + +def get_clang_llvm_releases(after_version: str) -> List[str]: + result : List[str] = [] + + page = 0 + ubuntu_release_pattern = r"^clang\+llvm-(\d+\.\d+\.\d+)-x86_64-linux-gnu-ubuntu-(\d+\.\d+)\.tar\.xz$" + while True: + response = requests.get(f'https://api.github.com/repos/llvm/llvm-project/releases?page={page}') + if len(response.json()) == 0: + break + if response.status_code != 200: + raise Exception(response.json()['message']) + + for release in response.json(): + # Skip Pre-release version + if release['prerelease']: + continue + + # Get release for latest ubuntu version + latest_ubuntu_release = "" + latest_ubuntu_version = "" + for asset in release['assets']: + url = urllib.parse.unquote(asset['browser_download_url']) + tar_file = url.split('/')[-1] + match = re.match(ubuntu_release_pattern, tar_file) + if not match: + continue + + # Version smaller than requested versions, return result + if version.parse(match.group(1)) <= version.parse(after_version): + return list(reversed(OrderedDict.fromkeys(result))) + + if latest_ubuntu_version == "" or version.parse(match.group(2)) > version.parse(latest_ubuntu_version): + latest_ubuntu_version = match.group(2) + latest_ubuntu_release = url + if latest_ubuntu_release != "": + result.append(latest_ubuntu_release) + page += 1 + + # Continue searching for releases from `PRE_GITHUH_RELEASE_URLS` + for release_url in PRE_GITHUH_RELEASE_URLS: + release_version = release_url.replace("https://releases.llvm.org/", '').split('/')[0] + + if version.parse(release_version) <= version.parse(after_version): + break + result.append(release_url) + + return list(reversed(OrderedDict.fromkeys(result))) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("after_version", help="Get list of llvm release after this version", type=str) + args = parser.parse_args() + release_urls = get_clang_llvm_releases(args.after_version) + for url in release_urls: + print(url) \ No newline at end of file diff --git a/dredd_test_runners/historical_check/main.py b/dredd_test_runners/historical_check/main.py new file mode 100644 index 0000000..8bff982 --- /dev/null +++ b/dredd_test_runners/historical_check/main.py @@ -0,0 +1,129 @@ +import argparse +import json +import sys +import tempfile +import subprocess +import os +import re + +from functools import partial +from pathlib import Path +from typing import Dict +from multiprocessing import Pool + +from dredd_test_runners.historical_check.get_clang_llvm_releases import get_clang_llvm_releases + +def check_compiler_with_test(csmith_root: Path, compiler_path: Path, test_dir_path: Path) -> bool: + # print(f"Checking {compiler_path} on {test_dir_path}") + compiler_args = ["-I", f"{csmith_root}/runtime", "-I", f"{csmith_root}/build/runtime", "-pedantic", "-Wall", "-fPIC"] + + test_prog_path = list(test_dir_path.glob('*.c')) + if len(test_prog_path) == 0: + print(f"No compilable file in {str(test_dir_path)}") + return False + + with tempfile.TemporaryDirectory() as tmpdir: + proc = subprocess.run([compiler_path, *compiler_args, "-O3", *test_prog_path, "-c"], cwd=tmpdir, capture_output=True) + if proc.returncode != 0: + print(f'Compilation failed for {compiler_path} with testcase {test_dir_path}:') + print(proc.stderr.decode()) + return False + + reference_output_path = test_dir_path / 'prog.reference_output' + is_miscompilation_test = reference_output_path.exists() + + if is_miscompilation_test: + object_files = [os.path.basename(f).replace('.c','.o') for f in test_prog_path] + proc = subprocess.run(['clang-15', *compiler_args, *object_files, "-o", "prog.exe"], cwd=tmpdir, capture_output=True) + if proc.returncode != 0: + print(f'Linking failed for {compiler_path} with testcase {test_dir_path}:') + print(proc.stderr.decode()) + return False + + proc = subprocess.run(['./prog.exe'], cwd=tmpdir, capture_output=True) + if proc.returncode != 0: + print(f'Execution failed for {compiler_path} with testcase {test_dir_path}:') + print(proc.stderr.decode()) + return False + + with open(reference_output_path, 'rb') as f: + reference_output = f.read() + if proc.stdout != reference_output: + print(f'Comparison failed for {compiler_path} with testcase {test_dir_path}:') + return False + + # print(f"Miscompilation check {compiler_path} on {test_dir_path} succeed") + # else: + # print(f"Crash check {compiler_path} on {test_dir_path} succeed") + + return True + + print(compiler_path, test_dir_path) + +def check_version_with_testsuite(version_url: str, testsuite: Path, csmith_root: Path): + version_tar_name = version_url.split('/')[-1] + pattern = r'(\.tar\.gz|\.tar\.bz2|\.tar\.xz|\.gz|\.bz2|\.xz)$' + version_dir_name = re.sub(pattern, '', version_tar_name) + + with tempfile.TemporaryDirectory() as tmpdir: + proc = subprocess.run(["curl", "-Lo", version_tar_name, version_url], cwd=tmpdir, capture_output=True) + if proc.returncode != 0: + print(proc.stderr.decode()) + return + + version_path = Path(tmpdir) / version_dir_name + version_path.mkdir() + + proc = subprocess.run(["tar", "-xf", version_tar_name, '-C', version_path, '--strip-components=1'], cwd=tmpdir, capture_output=True) + if proc.returncode != 0: + print(proc.stderr.decode()) + return + + compiler_path = version_path / "bin" / "clang" + + if not compiler_path.exists(): + print(f"Compiler path {compiler_path} doesn't exist.") + return + + test_dirs = testsuite.glob('*') + with Pool() as pool: + test_result = pool.map(partial(check_compiler_with_test, csmith_root, compiler_path), test_dirs) + print(f"RESULT OF {version_dir_name}: {sum(test_result)}/{len(test_result)}") + # for test_dir in test_dirs: + # check_compiler_with_test(csmith_root, compiler_path, test_dir) + + return + + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("work_dir", + help="Directory containing test results. It should have subdirectories, 'testsuite'.", + type=Path) + parser.add_argument("version", + help="The LLVM version for which the testsuite is being grown from (e.g., 14.0.0).", + type=str) + parser.add_argument("csmith_root", help="Path to a checkout of Csmith, assuming that it has been built under " + "'build' beneath this directory.", + type=Path) + args = parser.parse_args() + + testsuite_dir = args.work_dir.resolve() / "testsuite" + if not testsuite_dir.exists() or not testsuite_dir.is_dir(): + print(f"Error: {str(testsuite_dir)} does not exist.") + sys.exit(1) + + future_versions = get_clang_llvm_releases(args.version) + + for version_url in future_versions: + check_version_with_testsuite(version_url, testsuite_dir, args.csmith_root) + + pass + +if __name__ == '__main__': + main() + + + diff --git a/dredd_test_runners/llvm_regression_tests_runner/main.py b/dredd_test_runners/llvm_regression_tests_runner/main.py index 39f91e7..45d8be5 100644 --- a/dredd_test_runners/llvm_regression_tests_runner/main.py +++ b/dredd_test_runners/llvm_regression_tests_runner/main.py @@ -3,6 +3,7 @@ import os import tempfile import time +import datetime from enum import Enum from pathlib import Path @@ -98,6 +99,9 @@ def main(): print("Skipping test " + test_filename + " as a directory for it already exists") continue + # Record time at which consideration of this test started + analysis_timestamp_start: datetime.datetime = datetime.datetime.now() + test_time_start: float = time.time() test_result: ProcessResult = run_process_with_timeout( cmd=[args.mutated_compiler_bin_dir / "llvm-lit", @@ -187,7 +191,7 @@ def main(): mutant_path.mkdir() print("Writing kill info to file.") with open(mutant_path / "kill_info.json", "w") as outfile: - json.dump({"killing_test": test_filename_without_prefix, "kill_type": str(mutant_result)}, + json.dump({"killing_test": test_filename_without_prefix, "kill_type": str(mutant_result), "kill_timestamp": str(datetime.datetime.now())}, outfile) except FileExistsError: print(f"Mutant {mutant} was independently discovered to be killed.") @@ -203,12 +207,19 @@ def main(): killed_by_this_test.sort() covered_but_not_killed_by_this_test.sort() already_killed_by_other_tests.sort() + + # Record time at which consideration of this test ended + analysis_timestamp_end: datetime.datetime = datetime.datetime.now() + with open(test_output_directory / "kill_summary.json", "w") as outfile: json.dump({"test": test_filename_without_prefix, - "covered_mutants": covered_by_this_test, + "covered_mutants_count": len(covered_by_this_test), "killed_mutants": killed_by_this_test, - "skipped_mutants": already_killed_by_other_tests, - "survived_mutants": covered_but_not_killed_by_this_test}, outfile) + "skipped_mutants_count": len(already_killed_by_other_tests), + "survived_mutants_count": len(covered_but_not_killed_by_this_test), + "analysis_start_time": str(analysis_timestamp_start), + "analysis_end_time": str(analysis_timestamp_end), + }, outfile) if __name__ == '__main__': diff --git a/dredd_test_runners/llvm_test_suite_runner/main.py b/dredd_test_runners/llvm_test_suite_runner/main.py index 16ab0e4..dea4041 100644 --- a/dredd_test_runners/llvm_test_suite_runner/main.py +++ b/dredd_test_runners/llvm_test_suite_runner/main.py @@ -3,6 +3,7 @@ import os import time import tempfile +import datetime from pathlib import Path from dredd_test_runners.common.hash_file import hash_file @@ -79,6 +80,9 @@ def main(): print("Skipping test " + test_filename + " as it is not in a relevant directory") continue + # Record time at which consideration of this test started + analysis_timestamp_start: datetime.datetime = datetime.datetime.now() + # We attempt to create a directory that has the same name as this test file, except that we strip off the # LLVM test suite prefix, and change '/' to '_'. test_filename_without_llvm_test_suite_prefix = test_filename[len(str(args.llvm_test_suite_root) + "/"):] @@ -207,7 +211,9 @@ def main(): print("Writing kill info to file.") with open(mutant_path / "kill_info.json", "w") as outfile: json.dump({"killing_test": test_filename_without_llvm_test_suite_prefix, - "kill_type": str(mutant_result)}, outfile) + "kill_type": str(mutant_result), + "kill_timestamp": str(datetime.datetime.now()) + }, outfile) except FileExistsError: print(f"Mutant {mutant} was independently discovered to be killed.") continue @@ -222,12 +228,19 @@ def main(): killed_by_this_test.sort() covered_but_not_killed_by_this_test.sort() already_killed_by_other_tests.sort() + + # Record time at which consideration of this test ended + analysis_timestamp_end: datetime.datetime = datetime.datetime.now() + with open(test_output_directory / "kill_summary.json", "w") as outfile: json.dump({"test": test_filename_without_llvm_test_suite_prefix, - "covered_mutants": covered_by_this_test, + "covered_mutants_count": len(covered_by_this_test), "killed_mutants": killed_by_this_test, - "skipped_mutants": already_killed_by_other_tests, - "survived_mutants": covered_but_not_killed_by_this_test}, outfile) + "skipped_mutants_count": len(already_killed_by_other_tests), + "survived_mutants_count": len(covered_but_not_killed_by_this_test), + "analysis_start_time": str(analysis_timestamp_start), + "analysis_end_time": str(analysis_timestamp_end), + }, outfile) if __name__ == '__main__': diff --git a/dredd_test_runners/package_tests/__init__.py b/dredd_test_runners/package_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dredd_test_runners/package_tests/main.py b/dredd_test_runners/package_tests/main.py new file mode 100644 index 0000000..dd26a6d --- /dev/null +++ b/dredd_test_runners/package_tests/main.py @@ -0,0 +1,286 @@ +import argparse +import json +import sys +import tempfile +import subprocess +import os +import shutil + +from pathlib import Path +from typing import Dict +from dataclasses import dataclass + +from typing import List + +@dataclass +class Testcase: + mutant : int + prog_path : Path + kill_type : str + + +def get_testcases_from_reductions_dir(reductions_dir: Path, killed_mutants_dir: Path, include_timeout: bool) -> List[Testcase]: + result : List[Testcase] = [] + + for testcase in reductions_dir.glob("*"): + if not testcase.is_dir(): + continue + + # Ensure the reduction_summary and kill_info noth exist + reductions_summary: Path = testcase / "reduction_summary.json" + if not reductions_summary.exists(): + continue + mutant = str(testcase).replace(str(reductions_dir) + "/", "") + kill_info: Path = killed_mutants_dir / mutant / "kill_info.json" + if not kill_info.exists(): + continue + reductions_summary_json: Dict = json.load(open(reductions_summary, "r")) + kill_info_json: Dict = json.load(open(kill_info, "r")) + + # Check that the testcase is successfully reduced. + if ( + reductions_summary_json["reduction_status"] != "SUCCESS" + and not ( + include_timeout + and reductions_summary_json["reduction_status"] == "TIMEOUT" + ) + ): + print( + f"Skipping testsuite generation for mutant {mutant} creduce has status {reductions_summary_json['reduction_status']}." + ) + continue + + # ensure some compilable source file exist + if len(list(testcase.glob('*.c'))) == 0: + continue + + result.append(Testcase(int(mutant), testcase, kill_info_json["kill_type"])) + + return result + +def get_testcases_from_test_dir(tests_dir: Path, killed_mutants_dir: Path) -> List[Testcase]: + result : List[Testcase] = [] + + # Figure out all the tests that have killed mutants in ways for which reduction is + # actionable. A reason for determining all such tests upfront is that after we reduce one + # such test, it would be possible to see whether it kills any of the mutants killed by the other + # tests, avoiding the need to reduce those tests too if so. (However, this is not implemented at + # present and it may prove simpler to do all of the reductions and subsequently address + # redundancy.) + for test in tests_dir.glob('*'): + if not test.is_dir(): + continue + if not test.name.startswith("csmith") and not test.name.startswith("yarpgen"): + continue + kill_summary: Path = test / "kill_summary.json" + if not kill_summary.exists(): + continue + kill_summary_json: Dict = json.load(open(kill_summary, 'r')) + for mutant in kill_summary_json["killed_mutants"]: + mutant_summary = json.load(open(killed_mutants_dir / str(mutant) / "kill_info.json", 'r')) + kill_type: str = mutant_summary['kill_type'] + if (kill_type == 'KillStatus.KILL_DIFFERENT_STDOUT' + or kill_type == 'KillStatus.KILL_RUNTIME_TIMEOUT' + or kill_type == 'KillStatus.KILL_DIFFERENT_EXIT_CODES' + or kill_type == 'KillStatus.KILL_COMPILER_CRASH'): + + testcase = tests_dir / mutant_summary['killing_test'] + + # ensure some compilable source file exist + if len(list(testcase.glob('*.c'))) == 0: + continue + + # Test case reduction may be feasible and useful for this kill. + result.append(Testcase(mutant, testcase, kill_type)) + + return result + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "work_dir", + help="Directory containing test results. It should have subdirectories, 'killed_mutants' and 'reductions'(unless `use_unreduced_testcase` option is used) .", + type=Path, + ) + parser.add_argument( + "csmith_root", + help="Path to a checkout of Csmith, assuming that it has been built under " + "'build' beneath this directory.", + type=Path, + ) + parser.add_argument( + "--include_timeout", + default=False, + action="store_true", + help="Include timed out testcase in its final form.", + ) + parser.add_argument( + "--use_unreduced_testcase", + default=False, + action="store_true", + help="Use original unreduced program instead of reduced program.", + ) + args = parser.parse_args() + work_dir: Path = args.work_dir + if not work_dir.exists() or not work_dir.is_dir(): + print(f"Error: {str(work_dir)} is not a working directory.") + sys.exit(1) + tests_dir = work_dir / "tests" + if not tests_dir.exists() or not tests_dir.is_dir(): + print(f"Error: {str(tests_dir)} does not exist.") + sys.exit(1) + killed_mutants_dir = work_dir / "killed_mutants" + if not killed_mutants_dir.exists() or not killed_mutants_dir.is_dir(): + print(f"Error: {str(killed_mutants_dir)} does not exist.") + sys.exit(1) + reductions_dir = work_dir / "reductions" + if not args.use_unreduced_testcase and (not reductions_dir.exists() or not reductions_dir.is_dir()): + print(f"Error: {str(reductions_dir)} does not exist.") + sys.exit(1) + + testsuite_dir: Path = work_dir / "testsuite" + Path(testsuite_dir).mkdir(exist_ok=True) + + if args.use_unreduced_testcase: + testcases = get_testcases_from_test_dir(tests_dir, killed_mutants_dir) + else: + testcases = get_testcases_from_reductions_dir(reductions_dir, killed_mutants_dir, args.include_timeout) + + + for testcase in testcases: + + # Create a directory for this test case + current_testsuite_dir: Path = testsuite_dir / str(testcase.mutant) + try: + current_testsuite_dir.mkdir() + except FileExistsError: + continue + + # Check whether the test is miscompilation test or crash test + testcase_is_miscompilation_check = ( + not testcase.kill_type == "KillStatus.KILL_COMPILER_CRASH" + ) + + print(f"Starting testsuite generaton for {testcase.mutant}.") + + with tempfile.TemporaryDirectory() as tmpdir: + testfiles_path = list(testcase.prog_path.glob('*.[ch]')) + testfiles = [os.path.basename(p) for p in testfiles_path] + c_files = [f for f in testfiles if f.endswith('.c')] + for filepath in testfiles_path: + shutil.copy(filepath, Path(tmpdir)) + + # Common compiler args + compiler_args = [ + "-I", + f"{args.csmith_root}/runtime", + "-I", + f"{args.csmith_root}/build/runtime", + "-pedantic", + "-Wall", + ] + if not testcase_is_miscompilation_check: + compiler_args.append("-c") + + # compile with clang-15 + proc = subprocess.run( + ["clang-15", *compiler_args, "-O0", *c_files] + + (["-o", "__clang_O0"] if testcase_is_miscompilation_check else []), + cwd=tmpdir, + capture_output=True, + ) + if proc.returncode != 0: + print( + f"clang -O0 compilation for {testcase.mutant} failed with return code {proc.returncode}:" + ) + print(proc.stderr.decode()) + continue + + # Execute the clang-15 compiled binary + if testcase_is_miscompilation_check: + proc = subprocess.run(["./__clang_O0"], cwd=tmpdir, capture_output=True) + clang_output_O0 = proc.stdout + + # compile with clang-15 with -O3 + proc = subprocess.run( + ["clang-15", *compiler_args, "-O3", *c_files] + + (["-o", "__clang_O3"] if testcase_is_miscompilation_check else []), + cwd=tmpdir, + capture_output=True, + ) + if proc.returncode != 0: + print( + f"clang -O3 compilation for {testcase.mutant} failed with return code {proc.returncode}:" + ) + print(proc.stderr.decode()) + continue + + # Execute the clang-15 compiled binary + if testcase_is_miscompilation_check: + proc = subprocess.run(["./__clang_O3"], cwd=tmpdir, capture_output=True) + clang_output_O3 = proc.stdout + + if clang_output_O3 != clang_output_O0: + print( + f"clang -O0 and -O3 give different output for {testcase.mutant}" + ) + continue + + # compile with gcc with -O0 + proc = subprocess.run( + ["gcc-12", *compiler_args, "-O0", *c_files] + + (["-o", "__gcc_O0"] if testcase_is_miscompilation_check else []), + cwd=tmpdir, + capture_output=True, + ) + if proc.returncode != 0: + print( + f"gcc -O0 compilation for {testcase.mutant} failed with return code {proc.returncode}:" + ) + print(proc.stderr.decode()) + continue + + # Execute the gcc compiled binary + if testcase_is_miscompilation_check: + proc = subprocess.run(["./__gcc_O0"], cwd=tmpdir, capture_output=True) + gcc_output_O0 = proc.stdout + + if gcc_output_O0 != clang_output_O0: + print(f"gcc and clang give different output for {testcase.mutant}") + continue + + # compile with gcc with -O3 + proc = subprocess.run( + ["gcc-12", *compiler_args, "-O3", *c_files] + + (["-o", "__gcc_O3"] if testcase_is_miscompilation_check else []), + cwd=tmpdir, + capture_output=True, + ) + if proc.returncode != 0: + print( + f"gcc -O3 compilation for {testcase.mutant} failed with return code {proc.returncode}:" + ) + print(proc.stderr.decode()) + continue + + # Execute the gcc compiled binary + if testcase_is_miscompilation_check: + proc = subprocess.run(["./__gcc_O3"], cwd=tmpdir, capture_output=True) + gcc_output_O3 = proc.stdout + + if gcc_output_O3 != clang_output_O0: + print(f"gcc -O0 and -O3 give different output for {testcase.mutant}") + continue + + # shutil.copy(testcase.prog_path, current_testsuite_dir / "prog.c") + for filepath in testfiles_path: + shutil.copy(filepath, current_testsuite_dir) + if testcase_is_miscompilation_check: + with open(current_testsuite_dir / "prog.reference_output", "bw+") as f: + f.write(gcc_output_O3) + + print(f"Testsuite generaton for {testcase.mutant} succeed.") + + +if __name__ == "__main__": + main() diff --git a/dredd_test_runners/reduce_new_kills/interesting_crash.py.template b/dredd_test_runners/reduce_new_kills/interesting_crash.py.template new file mode 100755 index 0000000..9c1e419 --- /dev/null +++ b/dredd_test_runners/reduce_new_kills/interesting_crash.py.template @@ -0,0 +1,117 @@ +#!/usr/bin/python3 + +# Interestingness test for crashes + +import os +import re +import subprocess +import sys +import time + +# Split the program_to_check if it is derived from combined file +program_to_check = "{{ program_to_check }}" +if program_to_check == 'combined.c': + with open(program_to_check, 'r') as combined_f: + combined_file_content = combined_f.read() + seperated_contents = combined_file_content.split("// SENTINEL\n") + if len(seperated_contents) < 2: + # SENTINEL comment not present in combined file, not interesting + sys.exit(19) + + programs_to_check = [] + for i in range(len(seperated_contents)): + with open(f"file_{i}.c", 'w') as seperated_f: + seperated_f.write(seperated_contents[i]) + programs_to_check.append(f"file_{i}.c") +else: + # Otherwise, we only have one source file to check/compile + programs_to_check = [program_to_check] + +# Check that the program compiles with a recent clang +result: subprocess.CompletedProcess = subprocess.run( + ["clang-15", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-pedantic", "-Wall", + "-O0", "-c", *programs_to_check], capture_output=True) +if result.returncode != 0: + sys.exit(1) + +# Check the program is free from various telltale clang compiler warnings +output: str = result.stdout.decode('utf-8') + result.stderr.decode('utf-8') + +# If `clang-csmith-original-warnings.txt` does not exist, this is the first pass of the reduction. +# In this case, we save the pre-reduction warnings generated by clang to the file. +# If the file already exists, we compare the clang warnings from the reduced program against the +# original warnings to check if any new warnings are introduced. +if os.path.exists("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt"): + with open("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt", "r") as clang_warning_file: + clang_original_warnings_set = set([warning.rstrip() for warning in clang_warning_file.readlines()]) + clang_new_warnings_set = set(re.findall(r'\[-W.*\]', output)) + if len(clang_new_warnings_set - clang_original_warnings_set) != 0: + # new warnings appear + sys.exit(2) +else: + clang_warnings_set = set(re.findall(r'\[-W.*\]', output)) + with open("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt", "w") as clang_warning_file: + for warning in clang_warnings_set: + clang_warning_file.write(f"{warning}\n") + +# Check the program compiles with a recent gcc +result = subprocess.run( + ["gcc-12", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-c", "-Wall", "-Wextra", "-O3", + *programs_to_check], capture_output=True) +if result.returncode != 0: + sys.exit(3) + +# Check the program is free from various telltale gcc compiler warnings +output: str = result.stdout.decode('utf-8') + result.stderr.decode('utf-8') + +# If `gcc-csmith-original-warnings.txt` does not exist, this is the first pass of the reduction. +# In this case, we save the pre-reduction warnings generated by gcc to the file. +# If the file already exists, we compare the gcc warnings from the reduced program against the +# original warnings to check if any new warnings are introduced. +if os.path.exists("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt"): + with open("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt", "r") as gcc_warning_file: + gcc_original_warnings_set = set([warning.rstrip() for warning in gcc_warning_file.readlines()]) + gcc_new_warnings_set = set([re.sub(r"‘[^’]*’", "*", warning) for warning in re.findall(r'warning: .*', output)]) + if len(gcc_new_warnings_set - gcc_original_warnings_set) != 0: + # new warnings appear + sys.exit(4) +else: + gcc_warnings_set = set([re.sub(r"‘[^’]*’", "*", warning) for warning in re.findall(r'warning: .*', output)]) + with open("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt", "w") as gcc_warning_file: + for warning in gcc_warnings_set: + gcc_warning_file.write(f"{warning}\n") + + +# Compile with the unmutated compiler, timing how long this takes +compile_start = time.time() +result = subprocess.run( + ["{{ mutated_compiler_executable }}", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", + "-O3", *programs_to_check, "-o", "__regular"], capture_output=True) +compile_end = time.time() + +# Compilation with the non-mutated compiler should succeed +if result.returncode != 0: + sys.exit(5) + +# Compile with the mutated compiler, allowing compilation to take substantially longer +try: + dredd_environment = os.environ.copy() + dredd_environment["DREDD_ENABLED_MUTATION"] = "{{ mutation_ids }}" + result = subprocess.run(["{{ mutated_compiler_executable }}", "-fno-crash-diagnostics", "-I", "{{ csmith_root }}/runtime", + "-I", "{{ csmith_root }}/build/runtime", "-O3", *programs_to_check, "-o", "__mutated"], + capture_output=True, + timeout=max({{min_timeout_for_mutant_compilation}}, + {{timeout_multiplier_for_mutant_compilation}} * (compile_end - compile_start)), + env=dredd_environment) + if result.returncode == 0: + # Compilation with the mutated compiler succeeded, which is not + # interesting as we are looking for a mutation-induced + # compiler crash + sys.exit(6) +except subprocess.TimeoutExpired: + # Compilation with the mutated compiler timed out, which is not + # interesting as we are looking for a mutation-induced + # compiler crash + sys.exit(7) + +sys.exit(0) diff --git a/dredd_test_runners/reduce_new_kills/interesting.py.template b/dredd_test_runners/reduce_new_kills/interesting_miscompilation.py.template similarity index 59% rename from dredd_test_runners/reduce_new_kills/interesting.py.template rename to dredd_test_runners/reduce_new_kills/interesting_miscompilation.py.template index 3d3b02f..e31d734 100755 --- a/dredd_test_runners/reduce_new_kills/interesting.py.template +++ b/dredd_test_runners/reduce_new_kills/interesting_miscompilation.py.template @@ -1,5 +1,7 @@ #!/usr/bin/python3 +# Interestingness test for miscompilations + import filecmp import os import re @@ -7,70 +9,85 @@ import subprocess import sys import time +# Split the program_to_check if it is derived from combined file +program_to_check = "{{ program_to_check }}" +if program_to_check == 'combined.c': + with open(program_to_check, 'r') as combined_f: + combined_file_content = combined_f.read() + seperated_contents = combined_file_content.split("// SENTINEL\n") + if len(seperated_contents) < 2: + # SENTINEL comment not present in combined file, not interesting + sys.exit(19) + + programs_to_check = [] + for i in range(len(seperated_contents)): + with open(f"file_{i}.c", 'w') as seperated_f: + seperated_f.write(seperated_contents[i]) + programs_to_check.append(f"file_{i}.c") +else: + # Otherwise, we only have one source file to check/compile + programs_to_check = [program_to_check] + + # Check that the program compiles with a recent clang result: subprocess.CompletedProcess = subprocess.run( ["clang-15", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-pedantic", "-Wall", - "-O0", "-c", "{{ program_to_check }}"], capture_output=True) + "-O0", "-c", *programs_to_check], capture_output=True) if result.returncode != 0: sys.exit(1) # Check the program is free from various telltale clang compiler warnings output: str = result.stdout.decode('utf-8') + result.stderr.decode('utf-8') -for warning in ["incompatible redeclaration", - "ordered comparison between pointer", - "eliding middle term", - "end of non-void function", - "invalid in C99", - "specifies type", - "should return a value", - "too few argument", - "too many argument", - "return type of 'main" - "uninitialized", - "incompatible pointer to", - "incompatible integer to", - "type specifier missing", - "omitting the parameter name in a function definition is a C2x extension", - "was not declared, defaulting to type", - "is uninitialized when used here", - "format string is not a string literal"]: - if warning in output: + +# If `clang-csmith-original-warnings.txt` does not exist, this is the first pass of the reduction. +# In this case, we save the pre-reduction warnings generated by clang to the file. +# If the file already exists, we compare the clang warnings from the reduced program against the +# original warnings to check if any new warnings are introduced. +if os.path.exists("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt"): + with open("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt", "r") as clang_warning_file: + clang_original_warnings_set = set([warning.rstrip() for warning in clang_warning_file.readlines()]) + clang_new_warnings_set = set(re.findall(r'\[-W.*\]', output)) + if len(clang_new_warnings_set - clang_original_warnings_set) != 0: + # new warnings appear sys.exit(2) +else: + clang_warnings_set = set(re.findall(r'\[-W.*\]', output)) + with open("{{ csmith_original_warnings_dir }}/clang-csmith-original-warnings.txt", "w") as clang_warning_file: + for warning in clang_warnings_set: + clang_warning_file.write(f"{warning}\n") # Check the program compiles with a recent gcc result = subprocess.run( - ["gcc-12", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-c", "-Wall", "-Wextra", - "{{ program_to_check }}"], capture_output=True) + ["gcc-12", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-c", "-Wall", "-Wextra", "-O3", + *programs_to_check], capture_output=True) if result.returncode != 0: sys.exit(3) # Check the program is free from various telltale gcc compiler warnings output: str = result.stdout.decode('utf-8') + result.stderr.decode('utf-8') -for warning in ["uninitialized", - "control reaches end", - "no semicolon at end", - "incompatible pointer", - "cast from pointer to integer", - "ordered comparison of pointer with integer", - "declaration does not declare anything", - "expects type", - "assumed to have one element", - "division by zero", - "pointer from integer", - "incompatible implicit", - "excess elements in struct initializer", - "comparison between pointer and integer", - "format .* expects a matching .* argument", - "format not a string literal", - "no return statement in function returning non-void"]: - if re.search(warning, output): + +# If `gcc-csmith-original-warnings.txt` does not exist, this is the first pass of the reduction. +# In this case, we save the pre-reduction warnings generated by gcc to the file. +# If the file already exists, we compare the gcc warnings from the reduced program against the +# original warnings to check if any new warnings are introduced. +if os.path.exists("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt"): + with open("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt", "r") as gcc_warning_file: + gcc_original_warnings_set = set([warning.rstrip() for warning in gcc_warning_file.readlines()]) + gcc_new_warnings_set = set([re.sub(r"‘[^’]*’", "*", warning) for warning in re.findall(r'warning: .*', output)]) + if len(gcc_new_warnings_set - gcc_original_warnings_set) != 0: + # new warnings appear sys.exit(4) +else: + gcc_warnings_set = set([re.sub(r"‘[^’]*’", "*", warning) for warning in re.findall(r'warning: .*', output)]) + with open("{{ csmith_original_warnings_dir }}/gcc-csmith-original-warnings.txt", "w") as gcc_warning_file: + for warning in gcc_warnings_set: + gcc_warning_file.write(f"{warning}\n") # Compile with the unmutated compiler, timing how long this takes compile_start = time.time() result = subprocess.run( ["{{ mutated_compiler_executable }}", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", - "-O3", "{{ program_to_check }}", "-o", "__regular"], capture_output=True) + "-O3", *programs_to_check, "-o", "__regular"], capture_output=True) compile_end = time.time() # Compilation with the non-mutated compiler should succeed @@ -82,13 +99,13 @@ try: dredd_environment = os.environ.copy() dredd_environment["DREDD_ENABLED_MUTATION"] = "{{ mutation_ids }}" result = subprocess.run(["{{ mutated_compiler_executable }}", "-I", "{{ csmith_root }}/runtime", "-I", - "{{ csmith_root }}/build/runtime", "-O3", "{{ program_to_check }}", "-o", "__mutated"], + "{{ csmith_root }}/build/runtime", "-O3", *programs_to_check, "-o", "__mutated"], capture_output=True, timeout=max({{min_timeout_for_mutant_compilation}}, {{timeout_multiplier_for_mutant_compilation}} * (compile_end - compile_start)), env=dredd_environment) if result.returncode != 0: - # Compilation with the mutated file failed, which is not + # Compilation with the mutated compiler failed, which is not # interesting as we are looking for a mutation-induced # miscompilation sys.exit(6) @@ -139,7 +156,7 @@ except subprocess.TimeoutExpired: # Finally, check that the program is UB-free. First, use asan and ubsan. result = subprocess.run(["clang-15", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", - "-fsanitize=address,undefined", "-fno-sanitize-recover=undefined", "{{ program_to_check }}", + "-fsanitize=address,undefined", "-fno-sanitize-recover=undefined", *programs_to_check, "-o", "__sanitized"], capture_output=True) if result.returncode != 0: # Compilation failed - this really shouldn't happen, but if it does then @@ -153,7 +170,7 @@ if result.returncode != 0: # Now use msan result = subprocess.run( ["clang-15", "-I", "{{ csmith_root }}/runtime", "-I", "{{ csmith_root }}/build/runtime", "-fsanitize=memory", - "{{ program_to_check }}", "-o", "__sanitized"], capture_output=True) + *programs_to_check, "-o", "__sanitized"], capture_output=True) if result.returncode != 0: # Compilation failed - this really shouldn't happen, but if it does then # something is wrong diff --git a/dredd_test_runners/reduce_new_kills/main.py b/dredd_test_runners/reduce_new_kills/main.py index 0e58b46..d47544e 100644 --- a/dredd_test_runners/reduce_new_kills/main.py +++ b/dredd_test_runners/reduce_new_kills/main.py @@ -5,6 +5,9 @@ import shutil import stat import sys +import subprocess +import signal +import datetime from dredd_test_runners.common.constants import (DEFAULT_RUNTIME_TIMEOUT, MIN_TIMEOUT_FOR_MUTANT_COMPILATION, @@ -46,13 +49,15 @@ def main(): killed_mutant_to_test_info: Dict[int, Dict] = {} # Figure out all the tests that have killed mutants in ways for which reduction is - # actionable. The reason for determining all such tests upfront is that when we reduce one - # such test, we can quickly see whether it kills any of the mutants killed by the other - # tests, avoiding the need to reduce those tests too if so. + # actionable. A reason for determining all such tests upfront is that after we reduce one + # such test, it would be possible to see whether it kills any of the mutants killed by the other + # tests, avoiding the need to reduce those tests too if so. (However, this is not implemented at + # present and it may prove simpler to do all of the reductions and subsequently address + # redundancy.) for test in tests_dir.glob('*'): if not test.is_dir(): continue - if not test.name.startswith("csmith"): + if not test.name.startswith("csmith") and not test.name.startswith("yarpgen"): continue kill_summary: Path = test / "kill_summary.json" if not kill_summary.exists(): @@ -63,33 +68,66 @@ def main(): kill_type: str = mutant_summary['kill_type'] if (kill_type == 'KillStatus.KILL_DIFFERENT_STDOUT' or kill_type == 'KillStatus.KILL_RUNTIME_TIMEOUT' - or kill_type == 'KillStatus.KILL_DIFFERENT_EXIT_CODES'): - # This is an actionable kill: the mutated compiler produces a compilable program - # that runs, but that deviates from the expected result at runtime. + or kill_type == 'KillStatus.KILL_DIFFERENT_EXIT_CODES' + or kill_type == 'KillStatus.KILL_COMPILER_CRASH'): + # Test case reduction may be feasible and useful for this kill. killed_mutant_to_test_info[mutant] = mutant_summary reduction_queue: List[int] = list(killed_mutant_to_test_info.keys()) reduction_queue.sort() reductions_dir: Path = work_dir / "reductions" - if not work_dir.exists(): - os.makedirs(reductions_dir) + Path(reductions_dir).mkdir(exist_ok=True) while reduction_queue: mutant_to_reduce = reduction_queue.pop(0) current_reduction_dir: Path = reductions_dir / str(mutant_to_reduce) - if current_reduction_dir.exists(): + try: + current_reduction_dir.mkdir() + except FileExistsError: print(f"Skipping reduction for mutant {mutant_to_reduce} as {current_reduction_dir} already exists.") continue - os.makedirs(current_reduction_dir) - print(f"Preparing to reduce mutant {mutant_to_reduce}. Details: {killed_mutant_to_test_info[mutant_to_reduce]}") + mutant_summary = killed_mutant_to_test_info[mutant_to_reduce] + + print(f"Preparing to reduce mutant {mutant_to_reduce}. Details: {mutant_summary}") + + is_yarpgen_testcase = mutant_summary["killing_test"].startswith("yarpgen") + + if is_yarpgen_testcase: + # inline init.h in func.c + with open(tests_dir / killed_mutant_to_test_info[mutant_to_reduce]['killing_test'] / 'init.h', 'r') as init_f: + init_content = init_f.read() + with open(tests_dir / killed_mutant_to_test_info[mutant_to_reduce]['killing_test'] / 'func.c', 'r') as func_f: + func_content = func_f.read() + combined_file_content = func_content.replace('#include "init.h"', init_content) + + # Add SENTINEL comment to add as seperator btween func.c and driver.c + combined_file_content += "\n// SENTINEL\n" + + # Add content of driver.c + with open(tests_dir / killed_mutant_to_test_info[mutant_to_reduce]['killing_test'] / 'driver.c', 'r') as driver_f: + combined_file_content += driver_f.read() + + # write the combined file into combined.c + with open(current_reduction_dir / 'combined.c', 'w') as combined_f: + combined_f.write(combined_file_content) + program_to_check = "combined.c" + else: + shutil.copy(src=tests_dir / killed_mutant_to_test_info[mutant_to_reduce]['killing_test'] / 'prog.c', + dst=current_reduction_dir / 'prog.c') + program_to_check = "prog.c" + + interestingness_test_template_file = \ + "interesting_crash.py.template"\ + if mutant_summary['kill_type'] == 'KillStatus.KILL_COMPILER_CRASH'\ + else "interesting_miscompilation.py.template" interestingness_test_template = jinja2.Environment( loader=jinja2.FileSystemLoader( - searchpath=os.path.dirname(os.path.realpath(__file__)))).get_template("interesting.py.template") + searchpath=os.path.dirname(os.path.realpath(__file__)))).get_template(interestingness_test_template_file) open(current_reduction_dir / 'interesting.py', 'w').write(interestingness_test_template.render( - program_to_check="prog.c", + program_to_check=program_to_check, mutated_compiler_executable=args.mutated_compiler_executable, csmith_root=args.csmith_root, mutation_ids=str(mutant_to_reduce), @@ -97,27 +135,59 @@ def main(): timeout_multiplier_for_mutant_compilation=TIMEOUT_MULTIPLIER_FOR_MUTANT_COMPILATION, min_timeout_for_mutant_execution=MIN_TIMEOUT_FOR_MUTANT_EXECUTION, timeout_multiplier_for_mutant_execution=TIMEOUT_MULTIPLIER_FOR_MUTANT_EXECUTION, - default_runtime_timeout=DEFAULT_RUNTIME_TIMEOUT + default_runtime_timeout=DEFAULT_RUNTIME_TIMEOUT, + csmith_original_warnings_dir=os.path.abspath(current_reduction_dir) )) # Make the interestingness test executable. st = os.stat(current_reduction_dir / 'interesting.py') os.chmod(current_reduction_dir / 'interesting.py', st.st_mode | stat.S_IEXEC) - shutil.copy(src=tests_dir / killed_mutant_to_test_info[mutant_to_reduce]['killing_test'] / 'prog.c', - dst=current_reduction_dir / 'prog.c') - # 12 hour timeout - maybe_result: Optional[ProcessResult] = run_process_with_timeout( - cmd=['creduce', 'interesting.py', 'prog.c'], - timeout_seconds=43200, - cwd=current_reduction_dir) - if maybe_result is None: - print(f"Reduction of {mutant_to_reduce} timed out.") - - # TODO: Check for additional kills for the reduced program - # TODO: Emit a summary of the mutants that the reduced program kills - # TODO: Look into potential for automated cleanup of reduced program, e.g. to use standard data types or to - # be better formatted. + # Run creduce with 12 hour timeout and store in logfile + reduction_start_time: datetime.datetime = datetime.datetime.now() + reduction_status = "" + with open(os.path.join(current_reduction_dir, 'reduction_log.txt'), 'wb') as logfile: + try: + creduce_proc = subprocess.Popen(['creduce', 'interesting.py', program_to_check, '--n', '1'], + cwd=current_reduction_dir, stdout=logfile, stderr=logfile, + start_new_session=True) + creduce_proc.wait(timeout=43200) + if creduce_proc.returncode != 0: + print(f"Reduction of {mutant_to_reduce} failed with exit code {creduce_proc.returncode}") + reduction_status = "FAILED" + else: + print(f"Reduction of {mutant_to_reduce} succeed.") + reduction_status = "SUCCESS" + except subprocess.TimeoutExpired: + print(f"Reduction of {mutant_to_reduce} timed out.") + reduction_status = "TIMEOUT" + os.killpg(os.getpgid(creduce_proc.pid), signal.SIGTERM) + except Exception as exp: + print(f"Reduction of {mutant_to_reduce} failed with an exception: {exp}") + reduction_summary = "EXCPETION" + os.killpg(os.getpgid(creduce_proc.pid), signal.SIGTERM) + reduction_end_time: datetime.datetime = datetime.datetime.now() + + # Split the combined file into seperate file in necessary + if is_yarpgen_testcase: + with open(current_reduction_dir / 'combined.c', 'r') as combined_f: + combined_file_content = combined_f.read() + seperated_contents = combined_file_content.split("// SENTINEL\n") + if len(seperated_contents) < 2: + # SENTINEL comment not present in combined file + raise Exception("SENTINEL comment not present in combined file.") + for i in range(len(seperated_contents)): + with open(current_reduction_dir / f"file_{i}.c", 'w') as seperated_f: + seperated_f.write(seperated_contents[i]) + os.remove(current_reduction_dir / 'combined.c') + + + # Store reduction information + with open(os.path.join(current_reduction_dir, 'reduction_summary.json'), 'w') as summary_file: + json.dump({"reduction_status": reduction_status, + "reduction_start_time": str(reduction_start_time), + "reduction_end_time": str(reduction_end_time), + }, summary_file) if __name__ == '__main__': diff --git a/dredd_test_runners/yarpgen_runner/main.py b/dredd_test_runners/yarpgen_runner/main.py index e080376..08db7ad 100644 --- a/dredd_test_runners/yarpgen_runner/main.py +++ b/dredd_test_runners/yarpgen_runner/main.py @@ -6,6 +6,7 @@ import random import tempfile import time +import datetime from dredd_test_runners.common.constants import DEFAULT_COMPILATION_TIMEOUT, DEFAULT_RUNTIME_TIMEOUT from dredd_test_runners.common.hash_file import hash_file @@ -265,6 +266,9 @@ def main(): shutil.copy(src=yarpgen_out_dir / "func.c", dst=test_output_directory / "func.c") shutil.copy(src=yarpgen_out_dir / "init.h", dst=test_output_directory / "init.h") + # Record time at which consideration of this test started + analysis_timestamp_start: datetime.datetime = datetime.datetime.now() + # Load file contents into a list. We go from list to set to list to eliminate duplicates. covered_by_this_test: List[int] = list(set([int(line.strip()) for line in open(dredd_covered_mutants_path, 'r').readlines()])) @@ -316,7 +320,9 @@ def main(): print("Writing kill info to file.") with open(mutant_path / "kill_info.json", "w") as outfile: json.dump({"killing_test": yarpgen_test_name, - "kill_type": str(mutant_result)}, outfile) + "kill_type": str(mutant_result), + "kill_timestamp": str(datetime.datetime.now()), + }, outfile) except FileExistsError: print(f"Mutant {mutant} was independently discovered to be killed.") continue @@ -341,12 +347,19 @@ def main(): killed_by_this_test.sort() covered_but_not_killed_by_this_test.sort() already_killed_by_other_tests.sort() + + # Record time at which consideration of this test ended + analysis_timestamp_end: datetime.datetime = datetime.datetime.now() + with open(test_output_directory / "kill_summary.json", "w") as outfile: json.dump({"terminated_early": terminated_early, - "covered_mutants": covered_by_this_test, + "covered_mutants_count": len(covered_by_this_test), "killed_mutants": killed_by_this_test, - "skipped_mutants": already_killed_by_other_tests, - "survived_mutants": covered_but_not_killed_by_this_test}, outfile) + "skipped_mutants_count": len(already_killed_by_other_tests), + "survived_mutants_count": len(covered_but_not_killed_by_this_test), + "analysis_start_time": str(analysis_timestamp_start), + "analysis_end_time": str(analysis_timestamp_end), + }, outfile) if __name__ == '__main__': diff --git a/pyproject.toml b/pyproject.toml index c77ee60..de0f371 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,3 +26,5 @@ llvm-test-suite-runner = "dredd_test_runners.llvm_test_suite_runner.main:main" llvm-regression-tests-runner = "dredd_test_runners.llvm_regression_tests_runner.main:main" analyse-results = "dredd_test_runners.analyse_results.main:main" reduce-new-kills = "dredd_test_runners.reduce_new_kills.main:main" +package-tests = "dredd_test_runners.package_tests.main:main" +historical-check = "dredd_test_runners.historical_check.main:main"