ossf · DavidKorczynski · Nov 22, 2022 · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022
diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py
@@ -183,14 +183,12 @@ def get_node_coverage_hitcount(
         # As this is the first node ensure it is indeed the entrypoint.
         # The difference is this node has node "parent" or prior nodes.
 
-        # Arthur: Not true anymore for cpp or jvm with multiple yaml file
-        # TODO Relaxing this constraint temporary, fixed later
-        # if not profile.func_is_entrypoint(demangled_name):
-        #     raise AnalysisError(
-        #         "First node in calltree seems to be non-fuzzer function"
-        #     )
-
+        if not profile.func_is_entrypoint(demangled_name):
+            raise AnalysisError(
+                "First node in calltree is non-fuzzer function"
+            )
         coverage_data = profile.coverage.get_hit_details(demangled_name)
+
         if len(coverage_data) == 0:
             logger.error("There is no coverage data (not even all negative).")
         node.cov_parent = "EP"
@@ -224,7 +222,6 @@ def get_node_coverage_hitcount(
             if ih:
                 node_hitcount = 200
         elif profile.target_lang == "jvm":
-            # TODO Handle for jvm added later
             coverage_data = profile.coverage.get_hit_details(
                 callstack_get_parent(node, callstack)
             )
@@ -238,6 +235,7 @@ def get_node_coverage_hitcount(
         raise AnalysisError(
             "A node should either be the first or it must have a parent"
         )
+
     return node_hitcount
 
 

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
@@ -168,6 +168,7 @@ def get_hit_details(self, funcname: str) -> List[Tuple[int, int]]:
 
         if fuzz_key is None or fuzz_key not in self.covmap:
             return []
+
         return self.covmap[fuzz_key]
 
     def _python_ast_funcname_to_cov_file(
@@ -204,49 +205,22 @@ def _python_ast_funcname_to_cov_file(
 
         return target_key
 
-    def correlate_python_functions_with_coverage(
+    def _retrieve_func_line(
         self,
-        function_list,
-    ) -> None:
-
-        logger.info("Correlating")
-        # For each function identified in the ast identify the file
-        # where it resides in with respect to the filepaths from the
-        # coverage collection. Store this including the linumber
-        # of the function definition.
-        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
-        for func_key in function_list:
-            func = function_list[func_key]
-            function_name = func.function_name
-            function_line = func.function_linenumber
-
-            logger.debug(f"Correlated init: {function_name} ---- {function_line}")
-            cov_file = self._python_ast_funcname_to_cov_file(function_name)
-            if cov_file is None:
-                continue
-
-            # Return False if file is not in file_map
-            if cov_file not in self.file_map:
-                logger.debug("Target key is not in file_map")
-                continue
-
-            if cov_file not in file_and_function_mappings:
-                file_and_function_mappings[cov_file] = []
-
-            file_and_function_mappings[cov_file].append(
-                (function_name, function_line)
-            )
-
+        file_and_function_mappings,
+    ) -> Dict[str, List[Tuple[str, int, int]]]:
         # Sort function and lines numbers for each coverage file.
         # Store in function_internals.
-        logger.debug("Function intervals")
+        logger.debug("Geting function start and end line")
         function_internals: Dict[str, List[Tuple[str, int, int]]] = dict()
         for cov_file, function_specs in file_and_function_mappings.items():
+            # Sort by line number
             sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1]))
 
             function_internals[cov_file] = []
             for i in range(len(sorted_func_specs)):
                 fname, fstart = sorted_func_specs[i]
+
                 # Get next function lineno to identify boundary
                 if i < len(sorted_func_specs) - 1:
                     fnext_name, fnext_start = sorted_func_specs[i + 1]
@@ -257,13 +231,19 @@ def correlate_python_functions_with_coverage(
                     # Last function identified by end lineno being -1
                     function_internals[cov_file].append((fname, fstart, -1))
 
-        # Map the source codes of each line with coverage information.
-        # Store the result in covmap to be compatible with other languages.
+        return function_internals
+
+    def _map_func_covmap(
+        self,
+        function_internals,
+    ) -> None:
         for filename in function_internals:
             logger.debug(f"Filename: {filename}")
             for fname, fstart, fend in function_internals[filename]:
                 logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")
+
                 if fname not in self.covmap:
+                    # Fail safe
                     self.covmap[fname] = []
 
                 # If we have the file in dual_file_map identify the
@@ -273,13 +253,87 @@ def correlate_python_functions_with_coverage(
 
                 # Create the covmap
                 for exec_line in self.dual_file_map[filename]['executed_lines']:
-                    if exec_line > fstart and (exec_line < fend or fend == -1):
+                    if (exec_line > fstart) and (exec_line < fend or fend == -1):
                         logger.debug(f"E: {exec_line}")
                         self.covmap[fname].append((exec_line, 1000))
                 for non_exec_line in self.dual_file_map[filename]['missing_lines']:
-                    if non_exec_line > fstart and (non_exec_line < fend or fend == -1):
+                    if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1):
                         logger.debug(f"N: {non_exec_line}")
                         self.covmap[fname].append((non_exec_line, 0))
+
+    def correlate_python_functions_with_coverage(
+        self,
+        function_list,
+    ) -> None:
+
+        logger.info("Correlating")
+        # For each function identified in the ast identify the file
+        # where it resides in with respect to the filepaths from the
+        # coverage collection. Store this including the linumber
+        # of the function definition.
+        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
+        for func_key in function_list:
+            func = function_list[func_key]
+            function_name = func.function_name
+            function_line = func.function_linenumber
+
+            logger.debug(f"Correlated init: {function_name} ---- {function_line}")
+            cov_file = self._python_ast_funcname_to_cov_file(function_name)
+            if cov_file is None:
+                continue
+
+            # Return False if file is not in file_map
+            if cov_file not in self.file_map:
+                logger.debug("Target key is not in file_map")
+                continue
+
+            if cov_file not in file_and_function_mappings:
+                file_and_function_mappings[cov_file] = []
+
+            file_and_function_mappings[cov_file].append(
+                (function_name, function_line)
+            )
+
+        # Sort and retrieve line range of all functions
+        function_internals = self._retrieve_func_line(file_and_function_mappings)
+
+        # Map the source codes of each line with coverage information.
+        # Store the result in covmap to be compatible with other languages.
+        self._map_func_covmap(function_internals)
+
+        return
+
+    def correlate_jvm_method_with_coverage(
+        self,
+        function_list,
+    ) -> None:
+        logger.debug("Correlating JVM")
+
+        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
+        for (func_key, func) in function_list.items():
+            function_name = func.function_name
+            function_line = func.function_linenumber
+            class_name = func.function_source_file
+            logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}")
+
+            if class_name not in self.file_map:
+                logger.debug("Fail to find matching class")
+                continue
+
+            if class_name not in file_and_function_mappings:
+                file_and_function_mappings[class_name] = []
+
+            file_and_function_mappings[class_name].append(
+                (function_name, function_line)
+            )
+
+        # Sort and retrieve line range of all functions
+        function_internals = self._retrieve_func_line(file_and_function_mappings)
+
+        # Map the source codes of each line with coverage information.
+        # Store the result in covmap to be compatible with other languages.
+        self._map_func_covmap(function_internals)
+
         return
 
     def get_hit_summary(
@@ -503,6 +557,62 @@ def load_python_json_coverage(
     return cp
 
 
+def load_jvm_coverage(
+    target_dir: str,
+    target_name: Optional[str] = None
+) -> CoverageProfile:
+    """Find and load jacoco.xml, a jvm xml coverage report file
+
+    The xml file is generated from Jacoco plugin. The specific dtd of the xml can
+    be found in the following link:
+    - https://www.jacoco.org/jacoco/trunk/coverage/report.dtd
+
+    Return a CoverageProfile
+    """
+    import xml.etree.ElementTree as ET
+    cp = CoverageProfile()
+    cp.set_type("file")
+
+    coverage_reports = utils.get_all_files_in_tree_with_regex(target_dir, "jacoco.xml")
+    logger.info(f"FOUND XML COVERAGE FILES: {str(coverage_reports)}")
+
+    if len(coverage_reports) > 0:
+        xml_file = coverage_reports[0]
+    else:
+        logger.info("Found no coverage files")
+        return cp
+
+    cp.coverage_files.append(xml_file)
+    xml_tree = ET.parse(xml_file)
+    root = xml_tree.getroot()
+
+    for package in root.findall('package'):
+        for cl in package.findall('sourcefile'):
+            cov_entry = cl.attrib['name']
+            if package.attrib['name']:
+                cov_entry = "%s/%s" % (package.attrib['name'], cov_entry)
+            cov_entry = cov_entry.replace("/", ".")
+            cov_entry = cov_entry.replace(".java", "")
+            executed_lines = []
+            missing_lines = []
+            d_executed_lines = []
+            d_missing_lines = []
+            for line in cl.findall('line'):
+                if line.attrib['ci'] > "0":
+                    executed_lines.append((int(line.attrib['nr']), 1000))
+                    d_executed_lines.append(int(line.attrib['nr']))
+                else:
+                    missing_lines.append((int(line.attrib['nr']), 0))
+                    d_missing_lines.append(int(line.attrib['nr']))
+
+            cp.file_map[cov_entry] = executed_lines
+            cp.dual_file_map[cov_entry] = dict()
+            cp.dual_file_map[cov_entry]['executed_lines'] = d_executed_lines
+            cp.dual_file_map[cov_entry]['missing_lines'] = d_missing_lines
+
+    return cp
+
+
 if __name__ == "__main__":
     logging.basicConfig()
     logger.info("Starting coverage loader")

diff --git a/src/fuzz_introspector/data_loader.py b/src/fuzz_introspector/data_loader.py
@@ -95,7 +95,10 @@ def add_func_to_reached_and_clone(
     # Update hitcount of all functions reached by the function
     for func_name in func_to_add.functions_reached:
         if func_name not in merged_profile.all_functions:
-            logger.error(f"Mismatched function name: {func_name}")
+            if merged_profile_old.profiles[0].target_lang == "jvm":
+                logger.debug(f"{func_name} not provided within classpath")
+            else:
+                logger.error(f"Mismatched function name: {func_name}")
             continue
         f = merged_profile.all_functions[func_name]
         f.hitcount += 1
@@ -110,7 +113,10 @@ def add_func_to_reached_and_clone(
         uncovered_cc = 0
         for reached_func_name in f_profile.functions_reached:
             if reached_func_name not in merged_profile.all_functions:
-                logger.error(f"Mismatched function name: {reached_func_name}")
+                if merged_profile_old.profiles[0].target_lang == "jvm":
+                    logger.debug(f"{reached_func_name} not provided within classpath")
+                else:
+                    logger.error(f"Mismatched function name: {reached_func_name}")
                 continue
             f_reached = merged_profile.all_functions[reached_func_name]
             cc += f_reached.cyclomatic_complexity

diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py
@@ -101,8 +101,8 @@ def identifier(self):
             return os.path.basename(self.fuzzer_source_file).replace(".py", "")
 
         elif self._target_lang == "jvm":
-            # TODO Handle jvm fuzzer source file
-            pass
+            # Class name is used for jvm identifier
+            return os.path.basename(self.fuzzer_source_file)
 
         return self.fuzzer_source_file
 
@@ -150,8 +150,12 @@ def resolve_coverage_link(
                 function_name
             )
         elif self.target_lang == "jvm":
-            # TODO Add coverage report for JVM
-            pass
+            return self._resolve_jvm_coverage_link(
+                cov_url,
+                source_file,
+                lineno,
+                function_name
+            )
         else:
             logger.info("Could not find any html_status.json file")
         return "#"
@@ -364,9 +368,11 @@ def get_cov_metrics(
             total_func_lines, hit_lines = self.coverage.get_hit_summary(funcname)
             if total_func_lines is None or hit_lines is None:
                 return None, None, None
-
-            hit_percentage = (hit_lines / total_func_lines) * 100.0
-            return total_func_lines, hit_lines, hit_percentage
+            if total_func_lines == 0:
+                return 0, 0, 0
+            else:
+                hit_percentage = (hit_lines / total_func_lines) * 100.0
+                return total_func_lines, hit_lines, hit_percentage
         except Exception:
             return None, None, None
 
@@ -440,11 +446,14 @@ def _load_coverage(self, target_folder: str) -> None:
                     self.all_class_functions
                 )
         elif self.target_lang == "jvm":
-            # TODO Add JVM coverage loading support
-            self.coverage = code_coverage.load_llvm_coverage(
+            self.coverage = code_coverage.load_jvm_coverage(
                 target_folder,
                 self.identifier
             )
+            if self.coverage is not None:
+                self.coverage.correlate_jvm_method_with_coverage(
+                    self.all_class_functions
+                )
         else:
             raise DataLoaderError(
                 "The profile target has no coverage loading support"
@@ -556,3 +565,21 @@ def _resolve_python_coverage_link(
         else:
             logger.info("Could not find any html_status.json file")
         return "#"
+
+    def _resolve_jvm_coverage_link(
+        self,
+        cov_url: str,
+        source_file: str,
+        lineno: int,
+        function_name: str
+    ) -> str:
+        """Resolves link to HTML coverage report for JVM targets"""
+        # Handle source class for jvm
+        if ("." in source_file):
+            # Source file has package, change all . to path separator
+            source_file = source_file.replace(".", os.sep)
+        else:
+            # Source fil has no package, add in default package
+            source_file = os.path.join("default", source_file)
+
+        return cov_url + os.sep + source_file + ".html#L" + str(lineno)