From 614d6ad1186104659e8a7c1e9507dc04f58ea4c3 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 28 Nov 2022 21:18:54 +0000 Subject: [PATCH 1/6] Fix wrong checking bug Signed-off-by: Arthur Chan --- oss_fuzz_integration/runner.py | 49 ++++++++++++++++------------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/oss_fuzz_integration/runner.py b/oss_fuzz_integration/runner.py index 0863c297e..cb3bb1007 100755 --- a/oss_fuzz_integration/runner.py +++ b/oss_fuzz_integration/runner.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright 2021 Fuzz Introspector Authors # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -60,7 +59,7 @@ def download_full_public_corpus(project_name, target_corpus_dir: None): download_public_corpus(project_name, fuzzer, f"corpus-{project_name}-{fuzzer}.zip") if not target_corpus_dir: - target_corpus_dir = f"{project_name}-corpus" + target_corpus_dir = "mycorpus" if not os.path.isdir(target_corpus_dir): os.mkdir(target_corpus_dir) @@ -156,25 +155,29 @@ def patch_jvm_source_dead_link(server_directory, prefix): source report format and those links will be dead. This patch aim to check all those link and disable them if the link is dead. """ - # Patch dead link in fuzz_report.html (stored in all_functions.js / analysis_1.js) + # Patch dead link in fuzz_report.html (stored in js files) print("Start patching dead link in fuzz_report.html") - files = ["all_functions.js", "analysis_1.js"] - for file in files: - # Read js file - with open(os.path.join(server_directory, file)) as f: - report = f.read() - - # Replace dead link with '#' - links = re.findall(r'href=[\'"]?([^\'" >]+)', report) - links.extend(re.findall(r'[\'"]func_url[\'"]:\ [\'"]?([^\'" >]+)', report)) - for link in links: - if link.startswith(prefix) and not os.path.exists(link[1:].split("#")[0]): - report = report.replace(link, "#") - - # Write result back to js file - with open(os.path.join(server_directory, file), "w+") as f: - f.write(report) + for root, _, files in os.walk(os.path.abspath(server_directory)): + for file in files: + if file.endswith(".js"): + + # Read js file + with open(os.path.join(root, file)) as f: + report = f.read() + + # Replace dead link with '#' + links = re.findall(r'href=[\'"]?([^\'" >]+)', report) + links.extend(re.findall(r'[\'"]func_url[\'"]:\ [\'"]?([^\'" >]+)', report)) + for link in links: + if link.startswith(prefix) and not os.path.exists( + os.path.join(server_directory, link[1:].split("#")[0]) + ): + report = report.replace(link, "#") + + # Write result back to js file + with open(os.path.join(root, file), "w+") as f: + f.write(report) print("Finish patching dead link in fuzz_Report.html") @@ -617,12 +620,6 @@ def get_cmdline_parser() -> argparse.ArgumentParser: "project", help="name of project" ) - download_corpus_parser.add_argument( - "--corpus-dir", - type=str, - help="directory with corpus for the project", - default=None - ) return parser if __name__ == "__main__": @@ -655,4 +652,4 @@ def get_cmdline_parser() -> argparse.ArgumentParser: not args.no_coverage ) elif args.command == "download-corpus": - download_full_public_corpus(args.project, args.corpus_dir) + download_full_public_corpus(args.project) From e8643545ae91ef25cb7b8e398d27c83276a51e6e Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 28 Nov 2022 21:21:30 +0000 Subject: [PATCH 2/6] Fix bug in handling coverage links for optimal functions Signed-off-by: Arthur Chan --- .../analyses/optimal_targets.py | 12 ++- .../datatypes/fuzzer_profile.py | 84 +------------------ src/fuzz_introspector/utils.py | 53 ++++++++++++ 3 files changed, 64 insertions(+), 85 deletions(-) diff --git a/src/fuzz_introspector/analyses/optimal_targets.py b/src/fuzz_introspector/analyses/optimal_targets.py index 614c7b0e2..7873763ff 100644 --- a/src/fuzz_introspector/analyses/optimal_targets.py +++ b/src/fuzz_introspector/analyses/optimal_targets.py @@ -85,7 +85,8 @@ def analysis_func( optimal_target_functions, toc_list, tables, - coverage_url + coverage_url, + profiles[0].target_lang ) # Create section for how the state of the project will be if @@ -224,7 +225,8 @@ def get_optimal_target_section( optimal_target_functions: List[function_profile.FunctionProfile], toc_list: List[Tuple[str, str, int]], tables: List[str], - coverage_url: str + coverage_url: str, + target_lang: str = 'c-cpp' ) -> str: # Table with details about optimal target functions html_string = html_helpers.html_add_header_with_link( @@ -257,10 +259,12 @@ def get_optimal_target_section( ] ) for fd in optimal_target_functions: - func_cov_url = "%s%s.html#L%d" % ( + func_cov_url = utils.resolve_coverage_link ( coverage_url, fd.function_source_file, - fd.function_linenumber + fd.function_linenumber, + fd.function_name, + target_lang ) html_func_row = ( f"" diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index a90dbb081..20ac2122e 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -135,30 +135,13 @@ def resolve_coverage_link( function_name: str ) -> str: """Resolves a link to a coverage report.""" - if self.target_lang == "c-cpp": - return self._resolve_c_cpp_coverage_link( - cov_url, - source_file, - lineno, - function_name - ) - elif self.target_lang == "python": - return self._resolve_python_coverage_link( + return utils.resolve_coverage_link( cov_url, source_file, lineno, - function_name + function_name, + self.target_lang ) - elif self.target_lang == "jvm": - return self._resolve_jvm_coverage_link( - cov_url, - source_file, - lineno, - function_name - ) - else: - logger.info("Could not find any html_status.json file") - return "#" def refine_paths(self, basefolder: str) -> None: """Iterate over source files in the calltree and file_targets and remove @@ -525,64 +508,3 @@ def _is_func_name_missing_normalisation(self, func_name: str) -> bool: if split_name[-1].isnumeric(): return True return False - - def _resolve_c_cpp_coverage_link( - self, - cov_url: str, - source_file: str, - lineno: int, - function_name: str - ) -> str: - """Resolves link to HTML coverage report for C/CPP targets""" - return cov_url + source_file + ".html#L" + str(lineno) - - def _resolve_python_coverage_link( - self, - cov_url: str, - source_file: str, - lineno: int, - function_name: str - ) -> str: - """Resolves link to HTML coverage report for Python targets""" - # Temporarily for debugging purposes. TODO: David remove this later - # Find the html_status.json file. This is a file generated by the Python - # coverate utility and contains mappings from source to html file. We - # need this mapping in order to create links from the data extracted - # during AST analysis, as there we only have the source code. - html_summaries = utils.get_all_files_in_tree_with_regex(".", ".*html_status.json$") - logger.debug(str(html_summaries)) - if len(html_summaries) > 0: - html_idx = html_summaries[0] - with open(html_idx, "r") as jf: - data = json.load(jf) - for fl in data['files']: - found_target = utils.approximate_python_coverage_files( - function_name, - data['files'][fl]['index']['relative_filename'], - ) - if found_target: - return cov_url + "/" + fl + ".html" + "#t" + str(lineno) - else: - logger.info("Could not find any html_status.json file") - return "#" - - def _resolve_jvm_coverage_link( - self, - cov_url: str, - source_file: str, - lineno: int, - function_name: str - ) -> str: - """Resolves link to HTML coverage report for JVM targets""" - # Handle source class for jvm - if ("." in source_file): - # Source file has package, change package.class to package/class - source_file = os.sep.join(source_file.rsplit(".", 1)) - else: - # Source file has no package, add in default package - source_file = os.path.join("default", source_file) - - # Handle subclass definition in the same source file - source_file = source_file.split("$")[0] - - return cov_url + os.sep + source_file + ".java.html#L" + str(lineno) diff --git a/src/fuzz_introspector/utils.py b/src/fuzz_introspector/utils.py index 4b72720e3..a67d9c0c3 100644 --- a/src/fuzz_introspector/utils.py +++ b/src/fuzz_introspector/utils.py @@ -268,3 +268,56 @@ def load_func_names( continue loaded.append(demangle_cpp_func(reached)) return loaded + + +def resolve_coverage_link( + self, + cov_url: str, + source_file: str, + lineno: int, + function_name: str, + target_lang: str +) -> str: + """Resolves link to HTML coverage report""" + if (target_lang == "c-cpp": + return cov_url + source_file + ".html#L" + str(lineno) + elif (target_lang == "python": + """Resolves link to HTML coverage report for Python targets""" + # Temporarily for debugging purposes. TODO: David remove this later + # Find the html_status.json file. This is a file generated by the Python + # coverate utility and contains mappings from source to html file. We + # need this mapping in order to create links from the data extracted + # during AST analysis, as there we only have the source code. + html_summaries = utils.get_all_files_in_tree_with_regex(".", ".*html_status.json$") + logger.debug(str(html_summaries)) + if len(html_summaries) > 0: + html_idx = html_summaries[0] + with open(html_idx, "r") as jf: + data = json.load(jf) + for fl in data['files']: + found_target = utils.approximate_python_coverage_files( + function_name, + data['files'][fl]['index']['relative_filename'], + ) + if found_target: + return cov_url + "/" + fl + ".html" + "#t" + str(lineno) + else: + logger.info("Could not find any html_status.json file") + return "#" + elif (target_lang == "jvm"): + """Resolves link to HTML coverage report for JVM targets""" + # Handle source class for jvm + if ("." in source_file): + # Source file has package, change package.class to package/class + source_file = os.sep.join(source_file.rsplit(".", 1)) + else: + # Source file has no package, add in default package + source_file = os.path.join("default", source_file) + + # Handle subclass definition in the same source file + source_file = source_file.split("$")[0] + + return cov_url + os.sep + source_file + ".java.html#L" + str(lineno) + else: + logger.info("Unsupported language for coverage link resolve") + return "#" From 9a328cb0f893e8c2fc3ea335fc36a25f295c967f Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 28 Nov 2022 21:36:40 +0000 Subject: [PATCH 3/6] Fix bug Signed-off-by: Arthur Chan --- src/fuzz_introspector/utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/fuzz_introspector/utils.py b/src/fuzz_introspector/utils.py index a67d9c0c3..da84f9341 100644 --- a/src/fuzz_introspector/utils.py +++ b/src/fuzz_introspector/utils.py @@ -271,7 +271,6 @@ def load_func_names( def resolve_coverage_link( - self, cov_url: str, source_file: str, lineno: int, @@ -279,23 +278,23 @@ def resolve_coverage_link( target_lang: str ) -> str: """Resolves link to HTML coverage report""" - if (target_lang == "c-cpp": + if (target_lang == "c-cpp"): return cov_url + source_file + ".html#L" + str(lineno) - elif (target_lang == "python": + elif (target_lang == "python"): """Resolves link to HTML coverage report for Python targets""" # Temporarily for debugging purposes. TODO: David remove this later # Find the html_status.json file. This is a file generated by the Python # coverate utility and contains mappings from source to html file. We # need this mapping in order to create links from the data extracted # during AST analysis, as there we only have the source code. - html_summaries = utils.get_all_files_in_tree_with_regex(".", ".*html_status.json$") + html_summaries = get_all_files_in_tree_with_regex(".", ".*html_status.json$") logger.debug(str(html_summaries)) if len(html_summaries) > 0: html_idx = html_summaries[0] with open(html_idx, "r") as jf: data = json.load(jf) for fl in data['files']: - found_target = utils.approximate_python_coverage_files( + found_target = approximate_python_coverage_files( function_name, data['files'][fl]['index']['relative_filename'], ) From f86059cb0e25ece8540a65b5afdecea0945b42b1 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 28 Nov 2022 21:53:22 +0000 Subject: [PATCH 4/6] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analyses/optimal_targets.py | 2 +- src/fuzz_introspector/datatypes/fuzzer_profile.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fuzz_introspector/analyses/optimal_targets.py b/src/fuzz_introspector/analyses/optimal_targets.py index 7873763ff..4bc7f8412 100644 --- a/src/fuzz_introspector/analyses/optimal_targets.py +++ b/src/fuzz_introspector/analyses/optimal_targets.py @@ -259,7 +259,7 @@ def get_optimal_target_section( ] ) for fd in optimal_target_functions: - func_cov_url = utils.resolve_coverage_link ( + func_cov_url = utils.resolve_coverage_link( coverage_url, fd.function_source_file, fd.function_linenumber, diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index 20ac2122e..074f41bf0 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -14,7 +14,6 @@ """Fuzzer profile""" import os -import json import logging from typing import ( From 696f318ef93d32e1a32304704f67c226721577e7 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 29 Nov 2022 01:15:29 +0000 Subject: [PATCH 5/6] Merge changes from PR #638 #639 Signed-off-by: Arthur Chan --- oss_fuzz_integration/runner.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/oss_fuzz_integration/runner.py b/oss_fuzz_integration/runner.py index cb3bb1007..c153d0f2b 100755 --- a/oss_fuzz_integration/runner.py +++ b/oss_fuzz_integration/runner.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Copyright 2021 Fuzz Introspector Authors # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +60,7 @@ def download_full_public_corpus(project_name, target_corpus_dir: None): download_public_corpus(project_name, fuzzer, f"corpus-{project_name}-{fuzzer}.zip") if not target_corpus_dir: - target_corpus_dir = "mycorpus" + target_corpus_dir = f"{project_name}-corpus" if not os.path.isdir(target_corpus_dir): os.mkdir(target_corpus_dir) @@ -620,6 +621,12 @@ def get_cmdline_parser() -> argparse.ArgumentParser: "project", help="name of project" ) + download_corpus_parser.add_argument( + "--corpus-dir", + type=str, + help="directory with corpus for the project", + default=None + ) return parser if __name__ == "__main__": @@ -652,4 +659,4 @@ def get_cmdline_parser() -> argparse.ArgumentParser: not args.no_coverage ) elif args.command == "download-corpus": - download_full_public_corpus(args.project) + download_full_public_corpus(args.project, args.corpus_dir) From 68b145d06d6b6e3719d6b9c3310119a242d69a2f Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 29 Nov 2022 01:18:05 +0000 Subject: [PATCH 6/6] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/datatypes/fuzzer_profile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index 074f41bf0..530cc242e 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -135,12 +135,12 @@ def resolve_coverage_link( ) -> str: """Resolves a link to a coverage report.""" return utils.resolve_coverage_link( - cov_url, - source_file, - lineno, - function_name, - self.target_lang - ) + cov_url, + source_file, + lineno, + function_name, + self.target_lang + ) def refine_paths(self, basefolder: str) -> None: """Iterate over source files in the calltree and file_targets and remove