From 58b7f1233324530ee701c0bdac7f1682e6f94ee6 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 16 Nov 2022 16:46:25 +0000 Subject: [PATCH 01/15] Create dummy handler for jvm coverage report Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 25 ++++++++++++++++++ .../datatypes/fuzzer_profile.py | 26 ++++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index e7bf33530..aa476cb2a 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -502,6 +502,31 @@ def load_python_json_coverage( return cp +def load_jvm_coverage( + target_dir: str, + target_name: Optional[str] = None +) -> CoverageProfile: + """ + Scans a directory to read one or more coverage reports, and returns a CoverageProfile + Parses output from "llvm-cov show", e.g. + llvm-cov show -instr-profile=$profdata_file -object=$target \ + -line-coverage-gt=0 $shared_libraries $LLVM_COV_COMMON_ARGS > \ + ${FUZZER_STATS_DIR}/$target.covreport + This is used to parse JVM coverage. + The function supports loading multiple and individual coverage reports. + This is needed because finding coverage on a per-fuzzer basis requires + correlating binary files to a specific introspection profile from compile time. + However, files could be moved around, renamed, and so on. + As such, this function accepts an arugment "target_name" which is used to + target specific coverage profiles. However, if no coverage profile matches + that given name then the function will find *all* coverage reports it can and + use all of them. + """ + + cp = CoverageProfile() + + return cp + if __name__ == "__main__": logging.basicConfig() diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index 15ba658ac..c6b5f6ec3 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -101,8 +101,8 @@ def identifier(self): return os.path.basename(self.fuzzer_source_file).replace(".py", "") elif self._target_lang == "jvm": - # TODO Handle jvm fuzzer source file - pass + # Class name is used for jvm identifier + return os.path.basename(self.fuzzer_source_file) return self.fuzzer_source_file @@ -150,8 +150,12 @@ def resolve_coverage_link( function_name ) elif self.target_lang == "jvm": - # TODO Add coverage report for JVM - pass + return self._resolve_jvm_coverage_link( + cov_url, + source_file, + lineno, + function_name + ) else: logger.info("Could not find any html_status.json file") return "#" @@ -440,8 +444,7 @@ def _load_coverage(self, target_folder: str) -> None: self.all_class_functions ) elif self.target_lang == "jvm": - # TODO Add JVM coverage loading support - self.coverage = code_coverage.load_llvm_coverage( + self.coverage = code_coverage.load_jvm_coverage( target_folder, self.identifier ) @@ -556,3 +559,14 @@ def _resolve_python_coverage_link( else: logger.info("Could not find any html_status.json file") return "#" + + def _resolve_jvm_coverage_link( + self, + cov_url: str, + source_file: str, + lineno: int, + function_name: str + ) -> str: + """Resolves link to HTML coverage report for JVM targets""" + # TODO Add in cg to coverage report link for JVM + return "#" From af5dd39032645640e22fa60851401b2dba8845a8 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 16 Nov 2022 17:16:59 +0000 Subject: [PATCH 02/15] Add jvm resolve coverage link logic Signed-off-by: Arthur Chan --- .../datatypes/project_profile.py | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/fuzz_introspector/datatypes/project_profile.py b/src/fuzz_introspector/datatypes/project_profile.py index c28fb14f8..1d4a32d68 100644 --- a/src/fuzz_introspector/datatypes/project_profile.py +++ b/src/fuzz_introspector/datatypes/project_profile.py @@ -181,27 +181,26 @@ def resolve_coverage_report_link( func_name ): - if self.target_lang == "c-cpp": - return "%s%s.html#L%d" % ( + if self.target_lang == "python": + return self.profiles[0].resolve_coverage_link( coverage_url, function_source_file, - lineno + lineno, + func_name ) - elif self.target_lang == "python": + elif self.target_lang == "jvm": return self.profiles[0].resolve_coverage_link( coverage_url, function_source_file, lineno, func_name ) - elif self.target_lang == "jvm": - # TODO Add coverage details for jvm - pass - return "%s%s.html#L%d" % ( - coverage_url, - function_source_file, - lineno - ) + else: + return "%s%s.html#L%d" % ( + coverage_url, + function_source_file, + lineno + ) @property def target_lang(self): From 4a85e7279ecc16d1ac1be8a154a5b7747524563c Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 16 Nov 2022 18:04:11 +0000 Subject: [PATCH 03/15] Connect calltree to jacoco coverage report link Signed-off-by: Arthur Chan --- src/fuzz_introspector/datatypes/fuzzer_profile.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index c6b5f6ec3..a956c0b20 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -568,5 +568,12 @@ def _resolve_jvm_coverage_link( function_name: str ) -> str: """Resolves link to HTML coverage report for JVM targets""" - # TODO Add in cg to coverage report link for JVM - return "#" + # Handle source class for jvm + if ("." in source_file): + # Source file has package, change all . to path separator + source_file = source_file.replace(".", os.sep) + else: + # Source fil has no package, add in default package + source_file = os.path.join("default", source_file) + + return cov_url + os.sep + source_file + ".html#L" + str(lineno) From 5898a5fe479727a00936e2e6f4d7e58b6d22ba84 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Wed, 16 Nov 2022 18:09:12 +0000 Subject: [PATCH 04/15] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index aa476cb2a..abcf26c22 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -502,6 +502,7 @@ def load_python_json_coverage( return cp + def load_jvm_coverage( target_dir: str, target_name: Optional[str] = None From 11d91ac9173484e1dbb1bac74876b2929e2b4422 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Thu, 17 Nov 2022 14:33:55 +0000 Subject: [PATCH 05/15] Retrieving coverage from jacoco.xml Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 48 +++++++++++++++++--------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index abcf26c22..310c4af57 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -507,24 +507,38 @@ def load_jvm_coverage( target_dir: str, target_name: Optional[str] = None ) -> CoverageProfile: - """ - Scans a directory to read one or more coverage reports, and returns a CoverageProfile - Parses output from "llvm-cov show", e.g. - llvm-cov show -instr-profile=$profdata_file -object=$target \ - -line-coverage-gt=0 $shared_libraries $LLVM_COV_COMMON_ARGS > \ - ${FUZZER_STATS_DIR}/$target.covreport - This is used to parse JVM coverage. - The function supports loading multiple and individual coverage reports. - This is needed because finding coverage on a per-fuzzer basis requires - correlating binary files to a specific introspection profile from compile time. - However, files could be moved around, renamed, and so on. - As such, this function accepts an arugment "target_name" which is used to - target specific coverage profiles. However, if no coverage profile matches - that given name then the function will find *all* coverage reports it can and - use all of them. - """ - + import xml.etree.ElementTree as ET cp = CoverageProfile() + cp.set_type("file") + + coverage_reports = utils.get_all_files_in_tree_with_regex(target_dir, "jacoco.xml") + logger.info(f"FOUND XML COVERAGE FILES: {str(coverage_reports)}") + + if len(coverage_reports) > 0: + xml_file = coverage_reports[0] + else: + logger.info("Found no coverage files") + return cp + + cp.coverage_files.append(xml_file) + xml_tree = ET.parse(xml_file) + root = xml_tree.getroot() + + for package in root.findall('package'): + for cl in package.findall('sourcefile'): + cov_entry = cl.attrib['name'].replace(".java", "") + executed_lines = [] + missing_lines = [] + for line in cl.findall('line'): + if line.attrib['ci'] > "0": + executed_lines.append(line.attrib['nr']) + else: + missing_lines.append(line.attrib['nr']) + + cp.file_map[cov_entry] = executed_lines + cp.dual_file_map[cov_entry] = dict() + cp.dual_file_map[cov_entry]['executed_lines'] = executed_lines + cp.dual_file_map[cov_entry]['missing_lines'] = missing_lines return cp From b2bda5994f188ee6afb59d1abdabe5e116ee2f11 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Thu, 17 Nov 2022 16:01:37 +0000 Subject: [PATCH 06/15] Fix extraction of jvm xml handling Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 78 +++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index 310c4af57..2e25c29e1 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -168,6 +168,7 @@ def get_hit_details(self, funcname: str) -> List[Tuple[int, int]]: if fuzz_key is None or fuzz_key not in self.covmap: return [] + return self.covmap[fuzz_key] def _python_ast_funcname_to_cov_file( @@ -282,6 +283,79 @@ def correlate_python_functions_with_coverage( self.covmap[fname].append((non_exec_line, 0)) return + def correlate_jvm_method_with_coverage( + self, + function_list, + ) -> None: + logger.debug("Correlating JVM") + + file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict() + for func_key in function_list: + func = function_list[func_key] + function_name = func.function_name + function_line = func.function_linenumber + class_name = func.function_source_file + + logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}") + + if class_name not in self.file_map: + logger.debug("Fail to find matching class") + continue + + if class_name not in file_and_function_mappings: + file_and_function_mappings[class_name] = [] + + file_and_function_mappings[class_name].append( + (function_name, function_line) + ) + + logger.debug("Geting function start and end line") + function_internals: Dict[str, List[Tuple[str, int, int]]] = dict() + for cov_file, function_specs in file_and_function_mappings.items(): + # Sort by line number + sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1])) + + function_internals[cov_file] = [] + for i in range(len(sorted_func_specs)): + fname, fstart = sorted_func_specs[i] + # Get next function lineno to identify boundary + if i < len(sorted_func_specs) - 1: + fnext_name, fnext_start = sorted_func_specs[i + 1] + function_internals[cov_file].append( + (fname, fstart, fnext_start - 1) + ) + else: + # Last function identified by end lineno being -1 + function_internals[cov_file].append((fname, fstart, -1)) + + # Map the source codes of each line with coverage information. + # Store the result in covmap to be compatible with other languages. + for filename in function_internals: + logger.debug(f"Filename: {filename}") + for fname, fstart, fend in function_internals[filename]: + logger.debug(f"--- {fname} ::: {fstart} ::: {fend}") + if fname not in self.covmap: + # Fail safe + self.covmap[fname] = [] + + # If we have the file in dual_file_map identify the + # executed vs non-executed lines and store in covmap. + if filename not in self.dual_file_map: + continue + + # Create the covmap + for exec_line in self.dual_file_map[filename]['executed_lines']: + if int(exec_line) > fstart and int(exec_line) < fend: + logger.info(f"E: {exec_line}") + self.covmap[fname].append((exec_line, 1000)) + for non_exec_line in self.dual_file_map[filename]['missing_lines']: + if int(non_exec_line) > fstart and int(non_exec_line) < fend: + logger.info(f"N: {non_exec_line}") + self.covmap[fname].append((non_exec_line, 0)) + + return + + def get_hit_summary( self, funcname: str @@ -526,7 +600,9 @@ def load_jvm_coverage( for package in root.findall('package'): for cl in package.findall('sourcefile'): - cov_entry = cl.attrib['name'].replace(".java", "") + cov_entry = "%s/%s" % (package.attrib['name'],cl.attrib['name']) + cov_entry = cov_entry.replace("/",".") + cov_entry = cov_entry.replace(".java", "") executed_lines = [] missing_lines = [] for line in cl.findall('line'): From 3b14aac4e5c65ac693c588868928654ce14158f0 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Thu, 17 Nov 2022 16:02:37 +0000 Subject: [PATCH 07/15] Add additional extraction logic for jvm coverage report Signed-off-by: Arthur Chan --- src/fuzz_introspector/datatypes/fuzzer_profile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index a956c0b20..0c4fd0000 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -448,6 +448,10 @@ def _load_coverage(self, target_folder: str) -> None: target_folder, self.identifier ) + if self.coverage is not None: + self.coverage.correlate_jvm_method_with_coverage( + self.all_class_functions + ) else: raise DataLoaderError( "The profile target has no coverage loading support" From 388763b10d9ec8361369cb9db146eaf204dca89b Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 21 Nov 2022 15:08:00 +0000 Subject: [PATCH 08/15] Fix bug for int/string type conversion Fix bug for file long function handling Signed-off-by: Arthur Chan --- src/fuzz_introspector/analysis.py | 14 ++++++-------- src/fuzz_introspector/code_coverage.py | 22 ++++++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index 6e11408d9..24624a509 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -183,14 +183,12 @@ def get_node_coverage_hitcount( # As this is the first node ensure it is indeed the entrypoint. # The difference is this node has node "parent" or prior nodes. - # Arthur: Not true anymore for cpp or jvm with multiple yaml file - # TODO Relaxing this constraint temporary, fixed later - # if not profile.func_is_entrypoint(demangled_name): - # raise AnalysisError( - # "First node in calltree seems to be non-fuzzer function" - # ) - + if not profile.func_is_entrypoint(demangled_name): + raise AnalysisError( + "First node in calltree seems to be non-fuzzer function" + ) coverage_data = profile.coverage.get_hit_details(demangled_name) + if len(coverage_data) == 0: logger.error("There is no coverage data (not even all negative).") node.cov_parent = "EP" @@ -224,7 +222,6 @@ def get_node_coverage_hitcount( if ih: node_hitcount = 200 elif profile.target_lang == "jvm": - # TODO Handle for jvm added later coverage_data = profile.coverage.get_hit_details( callstack_get_parent(node, callstack) ) @@ -238,6 +235,7 @@ def get_node_coverage_hitcount( raise AnalysisError( "A node should either be the first or it must have a parent" ) + return node_hitcount diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index 2e25c29e1..fed5748bf 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -264,6 +264,7 @@ def correlate_python_functions_with_coverage( logger.debug(f"Filename: {filename}") for fname, fstart, fend in function_internals[filename]: logger.debug(f"--- {fname} ::: {fstart} ::: {fend}") + if fname not in self.covmap: self.covmap[fname] = [] @@ -290,12 +291,10 @@ def correlate_jvm_method_with_coverage( logger.debug("Correlating JVM") file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict() - for func_key in function_list: - func = function_list[func_key] + for (func_key, func) in function_list.items(): function_name = func.function_name function_line = func.function_linenumber class_name = func.function_source_file - logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}") if class_name not in self.file_map: @@ -318,6 +317,7 @@ def correlate_jvm_method_with_coverage( function_internals[cov_file] = [] for i in range(len(sorted_func_specs)): fname, fstart = sorted_func_specs[i] + # Get next function lineno to identify boundary if i < len(sorted_func_specs) - 1: fnext_name, fnext_start = sorted_func_specs[i + 1] @@ -345,12 +345,12 @@ def correlate_jvm_method_with_coverage( # Create the covmap for exec_line in self.dual_file_map[filename]['executed_lines']: - if int(exec_line) > fstart and int(exec_line) < fend: - logger.info(f"E: {exec_line}") + if (exec_line > fstart) and (exec_line < fend or fend == -1): + logger.debug(f"E: {exec_line}") self.covmap[fname].append((exec_line, 1000)) for non_exec_line in self.dual_file_map[filename]['missing_lines']: - if int(non_exec_line) > fstart and int(non_exec_line) < fend: - logger.info(f"N: {non_exec_line}") + if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1): + logger.debug(f"N: {non_exec_line}") self.covmap[fname].append((non_exec_line, 0)) return @@ -600,16 +600,18 @@ def load_jvm_coverage( for package in root.findall('package'): for cl in package.findall('sourcefile'): - cov_entry = "%s/%s" % (package.attrib['name'],cl.attrib['name']) + cov_entry = cl.attrib['name'] + if package.attrib['name']: + cov_entry = "%s/%s" % (package.attrib['name'], cov_entry) cov_entry = cov_entry.replace("/",".") cov_entry = cov_entry.replace(".java", "") executed_lines = [] missing_lines = [] for line in cl.findall('line'): if line.attrib['ci'] > "0": - executed_lines.append(line.attrib['nr']) + executed_lines.append(int(line.attrib['nr'])) else: - missing_lines.append(line.attrib['nr']) + missing_lines.append(int(line.attrib['nr'])) cp.file_map[cov_entry] = executed_lines cp.dual_file_map[cov_entry] = dict() From b8fadd1d137425f1104cb58b95665fa7e6a50601 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 21 Nov 2022 15:47:45 +0000 Subject: [PATCH 09/15] Fix divdided by zero bug Signed-off-by: Arthur Chan --- src/fuzz_introspector/data_loader.py | 10 ++++++++-- src/fuzz_introspector/datatypes/fuzzer_profile.py | 8 +++++--- src/fuzz_introspector/datatypes/project_profile.py | 5 ++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/fuzz_introspector/data_loader.py b/src/fuzz_introspector/data_loader.py index 7c1ae2a41..ad15fc420 100644 --- a/src/fuzz_introspector/data_loader.py +++ b/src/fuzz_introspector/data_loader.py @@ -95,7 +95,10 @@ def add_func_to_reached_and_clone( # Update hitcount of all functions reached by the function for func_name in func_to_add.functions_reached: if func_name not in merged_profile.all_functions: - logger.error(f"Mismatched function name: {func_name}") + if merged_profile_old.profiles[0].target_lang == "jvm": + logger.debug(f"{func_name} not provided within classpath") + else: + logger.error(f"Mismatched function name: {func_name}") continue f = merged_profile.all_functions[func_name] f.hitcount += 1 @@ -110,7 +113,10 @@ def add_func_to_reached_and_clone( uncovered_cc = 0 for reached_func_name in f_profile.functions_reached: if reached_func_name not in merged_profile.all_functions: - logger.error(f"Mismatched function name: {reached_func_name}") + if merged_profile_old.profiles[0].target_lang == "jvm": + logger.debug(f"{reached_func_name} not provided within classpath") + else: + logger.error(f"Mismatched function name: {reached_func_name}") continue f_reached = merged_profile.all_functions[reached_func_name] cc += f_reached.cyclomatic_complexity diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py index 0c4fd0000..e03041d60 100644 --- a/src/fuzz_introspector/datatypes/fuzzer_profile.py +++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py @@ -368,9 +368,11 @@ def get_cov_metrics( total_func_lines, hit_lines = self.coverage.get_hit_summary(funcname) if total_func_lines is None or hit_lines is None: return None, None, None - - hit_percentage = (hit_lines / total_func_lines) * 100.0 - return total_func_lines, hit_lines, hit_percentage + if total_func_lines == 0: + return 0, 0, 0 + else: + hit_percentage = (hit_lines / total_func_lines) * 100.0 + return total_func_lines, hit_lines, hit_percentage except Exception: return None, None, None diff --git a/src/fuzz_introspector/datatypes/project_profile.py b/src/fuzz_introspector/datatypes/project_profile.py index 1d4a32d68..b2a2e0a74 100644 --- a/src/fuzz_introspector/datatypes/project_profile.py +++ b/src/fuzz_introspector/datatypes/project_profile.py @@ -87,7 +87,10 @@ def __init__(self, profiles: List[fuzzer_profile.FuzzerProfile]): for reached_func_name in fp_obj.functions_reached: if reached_func_name not in self.all_functions: - logger.error(f"Mismatched function name: {reached_func_name}") + if profile.target_lang == "jvm": + logger.debug(f"{reached_func_name} not provided within classpath") + else: + logger.error(f"Mismatched function name: {reached_func_name}") continue reached_func_obj = self.all_functions[reached_func_name] reached_func_obj.incoming_references.append(fp_obj.function_name) From 58ad832458a69d2d11dd75b609a10f726207a851 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 21 Nov 2022 16:56:28 +0000 Subject: [PATCH 10/15] Fix type conversion bug and formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index fed5748bf..2a4962f0d 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -355,7 +355,6 @@ def correlate_jvm_method_with_coverage( return - def get_hit_summary( self, funcname: str @@ -603,15 +602,15 @@ def load_jvm_coverage( cov_entry = cl.attrib['name'] if package.attrib['name']: cov_entry = "%s/%s" % (package.attrib['name'], cov_entry) - cov_entry = cov_entry.replace("/",".") + cov_entry = cov_entry.replace("/", ".") cov_entry = cov_entry.replace(".java", "") executed_lines = [] missing_lines = [] for line in cl.findall('line'): if line.attrib['ci'] > "0": - executed_lines.append(int(line.attrib['nr'])) + executed_lines.append((int(line.attrib['nr']), 1000)) else: - missing_lines.append(int(line.attrib['nr'])) + missing_lines.append((int(line.attrib['nr']), 0)) cp.file_map[cov_entry] = executed_lines cp.dual_file_map[cov_entry] = dict() From 9a28282cff692805503ce7fa3a322de4a36e980b Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 21 Nov 2022 17:12:08 +0000 Subject: [PATCH 11/15] Fix bugs and code formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analysis.py | 2 +- src/fuzz_introspector/code_coverage.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index 24624a509..ca933a482 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -185,7 +185,7 @@ def get_node_coverage_hitcount( if not profile.func_is_entrypoint(demangled_name): raise AnalysisError( - "First node in calltree seems to be non-fuzzer function" + "First node in calltree is non-fuzzer function" ) coverage_data = profile.coverage.get_hit_details(demangled_name) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index 2a4962f0d..44abae3b8 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -606,16 +606,20 @@ def load_jvm_coverage( cov_entry = cov_entry.replace(".java", "") executed_lines = [] missing_lines = [] + d_executed_lines = [] + d_missing_lines = [] for line in cl.findall('line'): if line.attrib['ci'] > "0": executed_lines.append((int(line.attrib['nr']), 1000)) + d_executed_lines.append(int(line.attrib['nr'])) else: missing_lines.append((int(line.attrib['nr']), 0)) + d_missing_lines.append(int(line.attrib['nr'])) cp.file_map[cov_entry] = executed_lines cp.dual_file_map[cov_entry] = dict() - cp.dual_file_map[cov_entry]['executed_lines'] = executed_lines - cp.dual_file_map[cov_entry]['missing_lines'] = missing_lines + cp.dual_file_map[cov_entry]['executed_lines'] = d_executed_lines + cp.dual_file_map[cov_entry]['missing_lines'] = d_missing_lines return cp From 8c8b02cd9e03491136bec02bae180833d7faa9be Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Mon, 21 Nov 2022 17:38:22 +0000 Subject: [PATCH 12/15] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py index ca933a482..30dcfc705 100644 --- a/src/fuzz_introspector/analysis.py +++ b/src/fuzz_introspector/analysis.py @@ -185,7 +185,7 @@ def get_node_coverage_hitcount( if not profile.func_is_entrypoint(demangled_name): raise AnalysisError( - "First node in calltree is non-fuzzer function" + "First node in calltree is non-fuzzer function" ) coverage_data = profile.coverage.get_hit_details(demangled_name) From a06cfc2c407d571afaf19f30174a549e5e1e8a80 Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 22 Nov 2022 19:11:19 +0000 Subject: [PATCH 13/15] Add comments and combine redundant code Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 146 ++++++++++++------------- 1 file changed, 68 insertions(+), 78 deletions(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index 44abae3b8..9bf9b88ce 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -205,49 +205,22 @@ def _python_ast_funcname_to_cov_file( return target_key - def correlate_python_functions_with_coverage( + def _retrieve_func_line( self, - function_list, - ) -> None: - - logger.info("Correlating") - # For each function identified in the ast identify the file - # where it resides in with respect to the filepaths from the - # coverage collection. Store this including the linumber - # of the function definition. - file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict() - for func_key in function_list: - func = function_list[func_key] - function_name = func.function_name - function_line = func.function_linenumber - - logger.debug(f"Correlated init: {function_name} ---- {function_line}") - cov_file = self._python_ast_funcname_to_cov_file(function_name) - if cov_file is None: - continue - - # Return False if file is not in file_map - if cov_file not in self.file_map: - logger.debug("Target key is not in file_map") - continue - - if cov_file not in file_and_function_mappings: - file_and_function_mappings[cov_file] = [] - - file_and_function_mappings[cov_file].append( - (function_name, function_line) - ) - + file_and_function_mappings, + ) -> Dict[str, List[Tuple[str, int, int]]]: # Sort function and lines numbers for each coverage file. # Store in function_internals. - logger.debug("Function intervals") + logger.debug("Geting function start and end line") function_internals: Dict[str, List[Tuple[str, int, int]]] = dict() for cov_file, function_specs in file_and_function_mappings.items(): + # Sort by line number sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1])) function_internals[cov_file] = [] for i in range(len(sorted_func_specs)): fname, fstart = sorted_func_specs[i] + # Get next function lineno to identify boundary if i < len(sorted_func_specs) - 1: fnext_name, fnext_start = sorted_func_specs[i + 1] @@ -258,14 +231,19 @@ def correlate_python_functions_with_coverage( # Last function identified by end lineno being -1 function_internals[cov_file].append((fname, fstart, -1)) - # Map the source codes of each line with coverage information. - # Store the result in covmap to be compatible with other languages. + return function_internals + + def _map_func_covmap( + self, + function_internals, + ) -> None: for filename in function_internals: logger.debug(f"Filename: {filename}") for fname, fstart, fend in function_internals[filename]: logger.debug(f"--- {fname} ::: {fstart} ::: {fend}") if fname not in self.covmap: + # Fail safe self.covmap[fname] = [] # If we have the file in dual_file_map identify the @@ -275,13 +253,55 @@ def correlate_python_functions_with_coverage( # Create the covmap for exec_line in self.dual_file_map[filename]['executed_lines']: - if exec_line > fstart and exec_line < fend: + if (exec_line > fstart) and (exec_line < fend or fend == -1): logger.debug(f"E: {exec_line}") self.covmap[fname].append((exec_line, 1000)) for non_exec_line in self.dual_file_map[filename]['missing_lines']: - if non_exec_line > fstart and non_exec_line < fend: + if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1): logger.debug(f"N: {non_exec_line}") self.covmap[fname].append((non_exec_line, 0)) + + + def correlate_python_functions_with_coverage( + self, + function_list, + ) -> None: + + logger.info("Correlating") + # For each function identified in the ast identify the file + # where it resides in with respect to the filepaths from the + # coverage collection. Store this including the linumber + # of the function definition. + file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict() + for func_key in function_list: + func = function_list[func_key] + function_name = func.function_name + function_line = func.function_linenumber + + logger.debug(f"Correlated init: {function_name} ---- {function_line}") + cov_file = self._python_ast_funcname_to_cov_file(function_name) + if cov_file is None: + continue + + # Return False if file is not in file_map + if cov_file not in self.file_map: + logger.debug("Target key is not in file_map") + continue + + if cov_file not in file_and_function_mappings: + file_and_function_mappings[cov_file] = [] + + file_and_function_mappings[cov_file].append( + (function_name, function_line) + ) + + # Sort and retrieve line range of all functions + function_internals = self.retrieve_func_line(file_and_function_mappings) + + # Map the source codes of each line with coverage information. + # Store the result in covmap to be compatible with other languages. + self._map_func_covmap(function_internals) + return def correlate_jvm_method_with_coverage( @@ -308,50 +328,12 @@ def correlate_jvm_method_with_coverage( (function_name, function_line) ) - logger.debug("Geting function start and end line") - function_internals: Dict[str, List[Tuple[str, int, int]]] = dict() - for cov_file, function_specs in file_and_function_mappings.items(): - # Sort by line number - sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1])) - - function_internals[cov_file] = [] - for i in range(len(sorted_func_specs)): - fname, fstart = sorted_func_specs[i] - - # Get next function lineno to identify boundary - if i < len(sorted_func_specs) - 1: - fnext_name, fnext_start = sorted_func_specs[i + 1] - function_internals[cov_file].append( - (fname, fstart, fnext_start - 1) - ) - else: - # Last function identified by end lineno being -1 - function_internals[cov_file].append((fname, fstart, -1)) + # Sort and retrieve line range of all functions + function_internals = self.retrieve_func_line(file_and_function_mappings) # Map the source codes of each line with coverage information. # Store the result in covmap to be compatible with other languages. - for filename in function_internals: - logger.debug(f"Filename: {filename}") - for fname, fstart, fend in function_internals[filename]: - logger.debug(f"--- {fname} ::: {fstart} ::: {fend}") - if fname not in self.covmap: - # Fail safe - self.covmap[fname] = [] - - # If we have the file in dual_file_map identify the - # executed vs non-executed lines and store in covmap. - if filename not in self.dual_file_map: - continue - - # Create the covmap - for exec_line in self.dual_file_map[filename]['executed_lines']: - if (exec_line > fstart) and (exec_line < fend or fend == -1): - logger.debug(f"E: {exec_line}") - self.covmap[fname].append((exec_line, 1000)) - for non_exec_line in self.dual_file_map[filename]['missing_lines']: - if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1): - logger.debug(f"N: {non_exec_line}") - self.covmap[fname].append((non_exec_line, 0)) + self._map_func_covmap(function_internals) return @@ -580,6 +562,14 @@ def load_jvm_coverage( target_dir: str, target_name: Optional[str] = None ) -> CoverageProfile: + """Find and load jacoco.xml, a jvm xml coverage report file + + The xml file is generated from Jacoco plugin. The specific dtd of the xml can + be found in the following link: + - https://www.jacoco.org/jacoco/trunk/coverage/report.dtd + + Return a CoverageProfile + """ import xml.etree.ElementTree as ET cp = CoverageProfile() cp.set_type("file") From 6e241db77acbb28b12262dfd8dd9ebc41bae8eac Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 22 Nov 2022 19:22:22 +0000 Subject: [PATCH 14/15] Fix bug Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index d3b5e9df3..61db84d63 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -296,7 +296,7 @@ def correlate_python_functions_with_coverage( ) # Sort and retrieve line range of all functions - function_internals = self.retrieve_func_line(file_and_function_mappings) + function_internals = self._retrieve_func_line(file_and_function_mappings) # Map the source codes of each line with coverage information. # Store the result in covmap to be compatible with other languages. From 70a1bf4d0804b5d1deee351815ce6035b3c443bf Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Tue, 22 Nov 2022 19:23:40 +0000 Subject: [PATCH 15/15] Fix formatting Signed-off-by: Arthur Chan --- src/fuzz_introspector/code_coverage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py index 61db84d63..ca40feab6 100644 --- a/src/fuzz_introspector/code_coverage.py +++ b/src/fuzz_introspector/code_coverage.py @@ -261,7 +261,6 @@ def _map_func_covmap( logger.debug(f"N: {non_exec_line}") self.covmap[fname].append((non_exec_line, 0)) - def correlate_python_functions_with_coverage( self, function_list,