From 58b7f1233324530ee701c0bdac7f1682e6f94ee6 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Wed, 16 Nov 2022 16:46:25 +0000
Subject: [PATCH 01/15] Create dummy handler for jvm coverage report

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py        | 25 ++++++++++++++++++
 .../datatypes/fuzzer_profile.py               | 26 ++++++++++++++-----
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index e7bf33530..aa476cb2a 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -502,6 +502,31 @@ def load_python_json_coverage(
 
     return cp
 
+def load_jvm_coverage(
+    target_dir: str,
+    target_name: Optional[str] = None
+) -> CoverageProfile:
+    """
+    Scans a directory to read one or more coverage reports, and returns a CoverageProfile
+    Parses output from "llvm-cov show", e.g.
+        llvm-cov show -instr-profile=$profdata_file -object=$target \
+          -line-coverage-gt=0 $shared_libraries $LLVM_COV_COMMON_ARGS > \
+          ${FUZZER_STATS_DIR}/$target.covreport
+    This is used to parse JVM coverage.
+    The function supports loading multiple and individual coverage reports.
+    This is needed because finding coverage on a per-fuzzer basis requires
+    correlating binary files to a specific introspection profile from compile time.
+    However, files could be moved around, renamed, and so on.
+    As such, this function accepts an arugment "target_name" which is used to
+    target specific coverage profiles. However, if no coverage profile matches
+    that given name then the function will find *all* coverage reports it can and
+    use all of them.
+    """
+
+    cp = CoverageProfile()
+
+    return cp
+
 
 if __name__ == "__main__":
     logging.basicConfig()
diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py
index 15ba658ac..c6b5f6ec3 100644
--- a/src/fuzz_introspector/datatypes/fuzzer_profile.py
+++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py
@@ -101,8 +101,8 @@ def identifier(self):
             return os.path.basename(self.fuzzer_source_file).replace(".py", "")
 
         elif self._target_lang == "jvm":
-            # TODO Handle jvm fuzzer source file
-            pass
+            # Class name is used for jvm identifier
+            return os.path.basename(self.fuzzer_source_file)
 
         return self.fuzzer_source_file
 
@@ -150,8 +150,12 @@ def resolve_coverage_link(
                 function_name
             )
         elif self.target_lang == "jvm":
-            # TODO Add coverage report for JVM
-            pass
+            return self._resolve_jvm_coverage_link(
+                cov_url,
+                source_file,
+                lineno,
+                function_name
+            )
         else:
             logger.info("Could not find any html_status.json file")
         return "#"
@@ -440,8 +444,7 @@ def _load_coverage(self, target_folder: str) -> None:
                     self.all_class_functions
                 )
         elif self.target_lang == "jvm":
-            # TODO Add JVM coverage loading support
-            self.coverage = code_coverage.load_llvm_coverage(
+            self.coverage = code_coverage.load_jvm_coverage(
                 target_folder,
                 self.identifier
             )
@@ -556,3 +559,14 @@ def _resolve_python_coverage_link(
         else:
             logger.info("Could not find any html_status.json file")
         return "#"
+
+    def _resolve_jvm_coverage_link(
+        self,
+        cov_url: str,
+        source_file: str,
+        lineno: int,
+        function_name: str
+    ) -> str:
+        """Resolves link to HTML coverage report for JVM targets"""
+        # TODO Add in cg to coverage report link for JVM
+        return "#"

From af5dd39032645640e22fa60851401b2dba8845a8 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Wed, 16 Nov 2022 17:16:59 +0000
Subject: [PATCH 02/15] Add jvm resolve coverage link logic

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 .../datatypes/project_profile.py              | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/fuzz_introspector/datatypes/project_profile.py b/src/fuzz_introspector/datatypes/project_profile.py
index c28fb14f8..1d4a32d68 100644
--- a/src/fuzz_introspector/datatypes/project_profile.py
+++ b/src/fuzz_introspector/datatypes/project_profile.py
@@ -181,27 +181,26 @@ def resolve_coverage_report_link(
         func_name
     ):
 
-        if self.target_lang == "c-cpp":
-            return "%s%s.html#L%d" % (
+        if self.target_lang == "python":
+            return self.profiles[0].resolve_coverage_link(
                 coverage_url,
                 function_source_file,
-                lineno
+                lineno,
+                func_name
             )
-        elif self.target_lang == "python":
+        elif self.target_lang == "jvm":
             return self.profiles[0].resolve_coverage_link(
                 coverage_url,
                 function_source_file,
                 lineno,
                 func_name
             )
-        elif self.target_lang == "jvm":
-            # TODO Add coverage details for jvm
-            pass
-        return "%s%s.html#L%d" % (
-            coverage_url,
-            function_source_file,
-            lineno
-        )
+        else:
+            return "%s%s.html#L%d" % (
+                coverage_url,
+                function_source_file,
+                lineno
+            )
 
     @property
     def target_lang(self):

From 4a85e7279ecc16d1ac1be8a154a5b7747524563c Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Wed, 16 Nov 2022 18:04:11 +0000
Subject: [PATCH 03/15] Connect calltree to jacoco coverage report link

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/datatypes/fuzzer_profile.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py
index c6b5f6ec3..a956c0b20 100644
--- a/src/fuzz_introspector/datatypes/fuzzer_profile.py
+++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py
@@ -568,5 +568,12 @@ def _resolve_jvm_coverage_link(
         function_name: str
     ) -> str:
         """Resolves link to HTML coverage report for JVM targets"""
-        # TODO Add in cg to coverage report link for JVM
-        return "#"
+        # Handle source class for jvm
+        if ("." in source_file):
+            # Source file has package, change all . to path separator
+            source_file = source_file.replace(".", os.sep)
+        else:
+            # Source fil has no package, add in default package
+            source_file = os.path.join("default", source_file)
+
+        return cov_url + os.sep + source_file + ".html#L" + str(lineno)

From 5898a5fe479727a00936e2e6f4d7e58b6d22ba84 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Wed, 16 Nov 2022 18:09:12 +0000
Subject: [PATCH 04/15] Fix formatting

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index aa476cb2a..abcf26c22 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -502,6 +502,7 @@ def load_python_json_coverage(
 
     return cp
 
+
 def load_jvm_coverage(
     target_dir: str,
     target_name: Optional[str] = None

From 11d91ac9173484e1dbb1bac74876b2929e2b4422 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Thu, 17 Nov 2022 14:33:55 +0000
Subject: [PATCH 05/15] Retrieving coverage from jacoco.xml

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 48 +++++++++++++++++---------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index abcf26c22..310c4af57 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -507,24 +507,38 @@ def load_jvm_coverage(
     target_dir: str,
     target_name: Optional[str] = None
 ) -> CoverageProfile:
-    """
-    Scans a directory to read one or more coverage reports, and returns a CoverageProfile
-    Parses output from "llvm-cov show", e.g.
-        llvm-cov show -instr-profile=$profdata_file -object=$target \
-          -line-coverage-gt=0 $shared_libraries $LLVM_COV_COMMON_ARGS > \
-          ${FUZZER_STATS_DIR}/$target.covreport
-    This is used to parse JVM coverage.
-    The function supports loading multiple and individual coverage reports.
-    This is needed because finding coverage on a per-fuzzer basis requires
-    correlating binary files to a specific introspection profile from compile time.
-    However, files could be moved around, renamed, and so on.
-    As such, this function accepts an arugment "target_name" which is used to
-    target specific coverage profiles. However, if no coverage profile matches
-    that given name then the function will find *all* coverage reports it can and
-    use all of them.
-    """
-
+    import xml.etree.ElementTree as ET
     cp = CoverageProfile()
+    cp.set_type("file")
+
+    coverage_reports = utils.get_all_files_in_tree_with_regex(target_dir, "jacoco.xml")
+    logger.info(f"FOUND XML COVERAGE FILES: {str(coverage_reports)}")
+
+    if len(coverage_reports) > 0:
+        xml_file = coverage_reports[0]
+    else:
+        logger.info("Found no coverage files")
+        return cp
+
+    cp.coverage_files.append(xml_file)
+    xml_tree = ET.parse(xml_file)
+    root = xml_tree.getroot()
+
+    for package in root.findall('package'):
+        for cl in package.findall('sourcefile'):
+            cov_entry = cl.attrib['name'].replace(".java", "")
+            executed_lines = []
+            missing_lines = []
+            for line in cl.findall('line'):
+                if line.attrib['ci'] > "0":
+                    executed_lines.append(line.attrib['nr'])
+                else:
+                    missing_lines.append(line.attrib['nr'])
+
+            cp.file_map[cov_entry] = executed_lines
+            cp.dual_file_map[cov_entry] = dict()
+            cp.dual_file_map[cov_entry]['executed_lines'] = executed_lines
+            cp.dual_file_map[cov_entry]['missing_lines'] = missing_lines
 
     return cp
 

From b2bda5994f188ee6afb59d1abdabe5e116ee2f11 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Thu, 17 Nov 2022 16:01:37 +0000
Subject: [PATCH 06/15] Fix extraction of jvm xml handling

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 78 +++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index 310c4af57..2e25c29e1 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -168,6 +168,7 @@ def get_hit_details(self, funcname: str) -> List[Tuple[int, int]]:
 
         if fuzz_key is None or fuzz_key not in self.covmap:
             return []
+
         return self.covmap[fuzz_key]
 
     def _python_ast_funcname_to_cov_file(
@@ -282,6 +283,79 @@ def correlate_python_functions_with_coverage(
                         self.covmap[fname].append((non_exec_line, 0))
         return
 
+    def correlate_jvm_method_with_coverage(
+        self,
+        function_list,
+    ) -> None:
+        logger.debug("Correlating JVM")
+
+        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
+        for func_key in function_list:
+            func = function_list[func_key]
+            function_name = func.function_name
+            function_line = func.function_linenumber
+            class_name = func.function_source_file
+
+            logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}")
+
+            if class_name not in self.file_map:
+                logger.debug("Fail to find matching class")
+                continue
+
+            if class_name not in file_and_function_mappings:
+                file_and_function_mappings[class_name] = []
+
+            file_and_function_mappings[class_name].append(
+                (function_name, function_line)
+            )
+
+        logger.debug("Geting function start and end line")
+        function_internals: Dict[str, List[Tuple[str, int, int]]] = dict()
+        for cov_file, function_specs in file_and_function_mappings.items():
+            # Sort by line number
+            sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1]))
+
+            function_internals[cov_file] = []
+            for i in range(len(sorted_func_specs)):
+                fname, fstart = sorted_func_specs[i]
+                # Get next function lineno to identify boundary
+                if i < len(sorted_func_specs) - 1:
+                    fnext_name, fnext_start = sorted_func_specs[i + 1]
+                    function_internals[cov_file].append(
+                        (fname, fstart, fnext_start - 1)
+                    )
+                else:
+                    # Last function identified by end lineno being -1
+                    function_internals[cov_file].append((fname, fstart, -1))
+
+        # Map the source codes of each line with coverage information.
+        # Store the result in covmap to be compatible with other languages.
+        for filename in function_internals:
+            logger.debug(f"Filename: {filename}")
+            for fname, fstart, fend in function_internals[filename]:
+                logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")
+                if fname not in self.covmap:
+                    # Fail safe
+                    self.covmap[fname] = []
+
+                # If we have the file in dual_file_map identify the
+                # executed vs non-executed lines and store in covmap.
+                if filename not in self.dual_file_map:
+                    continue
+
+                # Create the covmap
+                for exec_line in self.dual_file_map[filename]['executed_lines']:
+                    if int(exec_line) > fstart and int(exec_line) < fend:
+                        logger.info(f"E: {exec_line}")
+                        self.covmap[fname].append((exec_line, 1000))
+                for non_exec_line in self.dual_file_map[filename]['missing_lines']:
+                    if int(non_exec_line) > fstart and int(non_exec_line) < fend:
+                        logger.info(f"N: {non_exec_line}")
+                        self.covmap[fname].append((non_exec_line, 0))
+
+        return
+
+
     def get_hit_summary(
         self,
         funcname: str
@@ -526,7 +600,9 @@ def load_jvm_coverage(
 
     for package in root.findall('package'):
         for cl in package.findall('sourcefile'):
-            cov_entry = cl.attrib['name'].replace(".java", "")
+            cov_entry = "%s/%s" % (package.attrib['name'],cl.attrib['name'])
+            cov_entry = cov_entry.replace("/",".")
+            cov_entry = cov_entry.replace(".java", "")
             executed_lines = []
             missing_lines = []
             for line in cl.findall('line'):

From 3b14aac4e5c65ac693c588868928654ce14158f0 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Thu, 17 Nov 2022 16:02:37 +0000
Subject: [PATCH 07/15] Add additional extraction logic for jvm coverage report

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/datatypes/fuzzer_profile.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py
index a956c0b20..0c4fd0000 100644
--- a/src/fuzz_introspector/datatypes/fuzzer_profile.py
+++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py
@@ -448,6 +448,10 @@ def _load_coverage(self, target_folder: str) -> None:
                 target_folder,
                 self.identifier
             )
+            if self.coverage is not None:
+                self.coverage.correlate_jvm_method_with_coverage(
+                    self.all_class_functions
+                )
         else:
             raise DataLoaderError(
                 "The profile target has no coverage loading support"

From 388763b10d9ec8361369cb9db146eaf204dca89b Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Mon, 21 Nov 2022 15:08:00 +0000
Subject: [PATCH 08/15] Fix bug for int/string type conversion Fix bug for file
 long function handling

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/analysis.py      | 14 ++++++--------
 src/fuzz_introspector/code_coverage.py | 22 ++++++++++++----------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py
index 6e11408d9..24624a509 100644
--- a/src/fuzz_introspector/analysis.py
+++ b/src/fuzz_introspector/analysis.py
@@ -183,14 +183,12 @@ def get_node_coverage_hitcount(
         # As this is the first node ensure it is indeed the entrypoint.
         # The difference is this node has node "parent" or prior nodes.
 
-        # Arthur: Not true anymore for cpp or jvm with multiple yaml file
-        # TODO Relaxing this constraint temporary, fixed later
-        # if not profile.func_is_entrypoint(demangled_name):
-        #     raise AnalysisError(
-        #         "First node in calltree seems to be non-fuzzer function"
-        #     )
-
+        if not profile.func_is_entrypoint(demangled_name):
+            raise AnalysisError(
+                 "First node in calltree seems to be non-fuzzer function"
+            )
         coverage_data = profile.coverage.get_hit_details(demangled_name)
+
         if len(coverage_data) == 0:
             logger.error("There is no coverage data (not even all negative).")
         node.cov_parent = "EP"
@@ -224,7 +222,6 @@ def get_node_coverage_hitcount(
             if ih:
                 node_hitcount = 200
         elif profile.target_lang == "jvm":
-            # TODO Handle for jvm added later
             coverage_data = profile.coverage.get_hit_details(
                 callstack_get_parent(node, callstack)
             )
@@ -238,6 +235,7 @@ def get_node_coverage_hitcount(
         raise AnalysisError(
             "A node should either be the first or it must have a parent"
         )
+
     return node_hitcount
 
 
diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index 2e25c29e1..fed5748bf 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -264,6 +264,7 @@ def correlate_python_functions_with_coverage(
             logger.debug(f"Filename: {filename}")
             for fname, fstart, fend in function_internals[filename]:
                 logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")
+
                 if fname not in self.covmap:
                     self.covmap[fname] = []
 
@@ -290,12 +291,10 @@ def correlate_jvm_method_with_coverage(
         logger.debug("Correlating JVM")
 
         file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
-        for func_key in function_list:
-            func = function_list[func_key]
+        for (func_key, func) in function_list.items():
             function_name = func.function_name
             function_line = func.function_linenumber
             class_name = func.function_source_file
-
             logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}")
 
             if class_name not in self.file_map:
@@ -318,6 +317,7 @@ def correlate_jvm_method_with_coverage(
             function_internals[cov_file] = []
             for i in range(len(sorted_func_specs)):
                 fname, fstart = sorted_func_specs[i]
+
                 # Get next function lineno to identify boundary
                 if i < len(sorted_func_specs) - 1:
                     fnext_name, fnext_start = sorted_func_specs[i + 1]
@@ -345,12 +345,12 @@ def correlate_jvm_method_with_coverage(
 
                 # Create the covmap
                 for exec_line in self.dual_file_map[filename]['executed_lines']:
-                    if int(exec_line) > fstart and int(exec_line) < fend:
-                        logger.info(f"E: {exec_line}")
+                    if (exec_line > fstart) and (exec_line < fend or fend == -1):
+                        logger.debug(f"E: {exec_line}")
                         self.covmap[fname].append((exec_line, 1000))
                 for non_exec_line in self.dual_file_map[filename]['missing_lines']:
-                    if int(non_exec_line) > fstart and int(non_exec_line) < fend:
-                        logger.info(f"N: {non_exec_line}")
+                    if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1):
+                        logger.debug(f"N: {non_exec_line}")
                         self.covmap[fname].append((non_exec_line, 0))
 
         return
@@ -600,16 +600,18 @@ def load_jvm_coverage(
 
     for package in root.findall('package'):
         for cl in package.findall('sourcefile'):
-            cov_entry = "%s/%s" % (package.attrib['name'],cl.attrib['name'])
+            cov_entry = cl.attrib['name']
+            if package.attrib['name']:
+                cov_entry = "%s/%s" % (package.attrib['name'], cov_entry)
             cov_entry = cov_entry.replace("/",".")
             cov_entry = cov_entry.replace(".java", "")
             executed_lines = []
             missing_lines = []
             for line in cl.findall('line'):
                 if line.attrib['ci'] > "0":
-                    executed_lines.append(line.attrib['nr'])
+                    executed_lines.append(int(line.attrib['nr']))
                 else:
-                    missing_lines.append(line.attrib['nr'])
+                    missing_lines.append(int(line.attrib['nr']))
 
             cp.file_map[cov_entry] = executed_lines
             cp.dual_file_map[cov_entry] = dict()

From b8fadd1d137425f1104cb58b95665fa7e6a50601 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Mon, 21 Nov 2022 15:47:45 +0000
Subject: [PATCH 09/15] Fix divdided by zero bug

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/data_loader.py               | 10 ++++++++--
 src/fuzz_introspector/datatypes/fuzzer_profile.py  |  8 +++++---
 src/fuzz_introspector/datatypes/project_profile.py |  5 ++++-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/fuzz_introspector/data_loader.py b/src/fuzz_introspector/data_loader.py
index 7c1ae2a41..ad15fc420 100644
--- a/src/fuzz_introspector/data_loader.py
+++ b/src/fuzz_introspector/data_loader.py
@@ -95,7 +95,10 @@ def add_func_to_reached_and_clone(
     # Update hitcount of all functions reached by the function
     for func_name in func_to_add.functions_reached:
         if func_name not in merged_profile.all_functions:
-            logger.error(f"Mismatched function name: {func_name}")
+            if merged_profile_old.profiles[0].target_lang == "jvm":
+                logger.debug(f"{func_name} not provided within classpath")
+            else:
+                logger.error(f"Mismatched function name: {func_name}")
             continue
         f = merged_profile.all_functions[func_name]
         f.hitcount += 1
@@ -110,7 +113,10 @@ def add_func_to_reached_and_clone(
         uncovered_cc = 0
         for reached_func_name in f_profile.functions_reached:
             if reached_func_name not in merged_profile.all_functions:
-                logger.error(f"Mismatched function name: {reached_func_name}")
+                if merged_profile_old.profiles[0].target_lang == "jvm":
+                    logger.debug(f"{reached_func_name} not provided within classpath")
+                else:
+                    logger.error(f"Mismatched function name: {reached_func_name}")
                 continue
             f_reached = merged_profile.all_functions[reached_func_name]
             cc += f_reached.cyclomatic_complexity
diff --git a/src/fuzz_introspector/datatypes/fuzzer_profile.py b/src/fuzz_introspector/datatypes/fuzzer_profile.py
index 0c4fd0000..e03041d60 100644
--- a/src/fuzz_introspector/datatypes/fuzzer_profile.py
+++ b/src/fuzz_introspector/datatypes/fuzzer_profile.py
@@ -368,9 +368,11 @@ def get_cov_metrics(
             total_func_lines, hit_lines = self.coverage.get_hit_summary(funcname)
             if total_func_lines is None or hit_lines is None:
                 return None, None, None
-
-            hit_percentage = (hit_lines / total_func_lines) * 100.0
-            return total_func_lines, hit_lines, hit_percentage
+            if total_func_lines == 0:
+                return 0, 0, 0
+            else:
+                hit_percentage = (hit_lines / total_func_lines) * 100.0
+                return total_func_lines, hit_lines, hit_percentage
         except Exception:
             return None, None, None
 
diff --git a/src/fuzz_introspector/datatypes/project_profile.py b/src/fuzz_introspector/datatypes/project_profile.py
index 1d4a32d68..b2a2e0a74 100644
--- a/src/fuzz_introspector/datatypes/project_profile.py
+++ b/src/fuzz_introspector/datatypes/project_profile.py
@@ -87,7 +87,10 @@ def __init__(self, profiles: List[fuzzer_profile.FuzzerProfile]):
 
             for reached_func_name in fp_obj.functions_reached:
                 if reached_func_name not in self.all_functions:
-                    logger.error(f"Mismatched function name: {reached_func_name}")
+                    if profile.target_lang == "jvm":
+                        logger.debug(f"{reached_func_name} not provided within classpath")
+                    else:
+                        logger.error(f"Mismatched function name: {reached_func_name}")
                     continue
                 reached_func_obj = self.all_functions[reached_func_name]
                 reached_func_obj.incoming_references.append(fp_obj.function_name)

From 58ad832458a69d2d11dd75b609a10f726207a851 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Mon, 21 Nov 2022 16:56:28 +0000
Subject: [PATCH 10/15] Fix type conversion bug and formatting

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index fed5748bf..2a4962f0d 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -355,7 +355,6 @@ def correlate_jvm_method_with_coverage(
 
         return
 
-
     def get_hit_summary(
         self,
         funcname: str
@@ -603,15 +602,15 @@ def load_jvm_coverage(
             cov_entry = cl.attrib['name']
             if package.attrib['name']:
                 cov_entry = "%s/%s" % (package.attrib['name'], cov_entry)
-            cov_entry = cov_entry.replace("/",".")
+            cov_entry = cov_entry.replace("/", ".")
             cov_entry = cov_entry.replace(".java", "")
             executed_lines = []
             missing_lines = []
             for line in cl.findall('line'):
                 if line.attrib['ci'] > "0":
-                    executed_lines.append(int(line.attrib['nr']))
+                    executed_lines.append((int(line.attrib['nr']), 1000))
                 else:
-                    missing_lines.append(int(line.attrib['nr']))
+                    missing_lines.append((int(line.attrib['nr']), 0))
 
             cp.file_map[cov_entry] = executed_lines
             cp.dual_file_map[cov_entry] = dict()

From 9a28282cff692805503ce7fa3a322de4a36e980b Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Mon, 21 Nov 2022 17:12:08 +0000
Subject: [PATCH 11/15] Fix bugs and code formatting

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/analysis.py      | 2 +-
 src/fuzz_introspector/code_coverage.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py
index 24624a509..ca933a482 100644
--- a/src/fuzz_introspector/analysis.py
+++ b/src/fuzz_introspector/analysis.py
@@ -185,7 +185,7 @@ def get_node_coverage_hitcount(
 
         if not profile.func_is_entrypoint(demangled_name):
             raise AnalysisError(
-                 "First node in calltree seems to be non-fuzzer function"
+                 "First node in calltree is non-fuzzer function"
             )
         coverage_data = profile.coverage.get_hit_details(demangled_name)
 
diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index 2a4962f0d..44abae3b8 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -606,16 +606,20 @@ def load_jvm_coverage(
             cov_entry = cov_entry.replace(".java", "")
             executed_lines = []
             missing_lines = []
+            d_executed_lines = []
+            d_missing_lines = []
             for line in cl.findall('line'):
                 if line.attrib['ci'] > "0":
                     executed_lines.append((int(line.attrib['nr']), 1000))
+                    d_executed_lines.append(int(line.attrib['nr']))
                 else:
                     missing_lines.append((int(line.attrib['nr']), 0))
+                    d_missing_lines.append(int(line.attrib['nr']))
 
             cp.file_map[cov_entry] = executed_lines
             cp.dual_file_map[cov_entry] = dict()
-            cp.dual_file_map[cov_entry]['executed_lines'] = executed_lines
-            cp.dual_file_map[cov_entry]['missing_lines'] = missing_lines
+            cp.dual_file_map[cov_entry]['executed_lines'] = d_executed_lines
+            cp.dual_file_map[cov_entry]['missing_lines'] = d_missing_lines
 
     return cp
 

From 8c8b02cd9e03491136bec02bae180833d7faa9be Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Mon, 21 Nov 2022 17:38:22 +0000
Subject: [PATCH 12/15] Fix formatting

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fuzz_introspector/analysis.py b/src/fuzz_introspector/analysis.py
index ca933a482..30dcfc705 100644
--- a/src/fuzz_introspector/analysis.py
+++ b/src/fuzz_introspector/analysis.py
@@ -185,7 +185,7 @@ def get_node_coverage_hitcount(
 
         if not profile.func_is_entrypoint(demangled_name):
             raise AnalysisError(
-                 "First node in calltree is non-fuzzer function"
+                "First node in calltree is non-fuzzer function"
             )
         coverage_data = profile.coverage.get_hit_details(demangled_name)
 

From a06cfc2c407d571afaf19f30174a549e5e1e8a80 Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Tue, 22 Nov 2022 19:11:19 +0000
Subject: [PATCH 13/15] Add comments and combine redundant code

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 146 ++++++++++++-------------
 1 file changed, 68 insertions(+), 78 deletions(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index 44abae3b8..9bf9b88ce 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -205,49 +205,22 @@ def _python_ast_funcname_to_cov_file(
 
         return target_key
 
-    def correlate_python_functions_with_coverage(
+    def _retrieve_func_line(
         self,
-        function_list,
-    ) -> None:
-
-        logger.info("Correlating")
-        # For each function identified in the ast identify the file
-        # where it resides in with respect to the filepaths from the
-        # coverage collection. Store this including the linumber
-        # of the function definition.
-        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
-        for func_key in function_list:
-            func = function_list[func_key]
-            function_name = func.function_name
-            function_line = func.function_linenumber
-
-            logger.debug(f"Correlated init: {function_name} ---- {function_line}")
-            cov_file = self._python_ast_funcname_to_cov_file(function_name)
-            if cov_file is None:
-                continue
-
-            # Return False if file is not in file_map
-            if cov_file not in self.file_map:
-                logger.debug("Target key is not in file_map")
-                continue
-
-            if cov_file not in file_and_function_mappings:
-                file_and_function_mappings[cov_file] = []
-
-            file_and_function_mappings[cov_file].append(
-                (function_name, function_line)
-            )
-
+        file_and_function_mappings,
+    ) -> Dict[str, List[Tuple[str, int, int]]]:
         # Sort function and lines numbers for each coverage file.
         # Store in function_internals.
-        logger.debug("Function intervals")
+        logger.debug("Geting function start and end line")
         function_internals: Dict[str, List[Tuple[str, int, int]]] = dict()
         for cov_file, function_specs in file_and_function_mappings.items():
+            # Sort by line number
             sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1]))
 
             function_internals[cov_file] = []
             for i in range(len(sorted_func_specs)):
                 fname, fstart = sorted_func_specs[i]
+
                 # Get next function lineno to identify boundary
                 if i < len(sorted_func_specs) - 1:
                     fnext_name, fnext_start = sorted_func_specs[i + 1]
@@ -258,14 +231,19 @@ def correlate_python_functions_with_coverage(
                     # Last function identified by end lineno being -1
                     function_internals[cov_file].append((fname, fstart, -1))
 
-        # Map the source codes of each line with coverage information.
-        # Store the result in covmap to be compatible with other languages.
+        return function_internals
+
+    def _map_func_covmap(
+        self,
+        function_internals,
+    ) -> None:
         for filename in function_internals:
             logger.debug(f"Filename: {filename}")
             for fname, fstart, fend in function_internals[filename]:
                 logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")
 
                 if fname not in self.covmap:
+                    # Fail safe
                     self.covmap[fname] = []
 
                 # If we have the file in dual_file_map identify the
@@ -275,13 +253,55 @@ def correlate_python_functions_with_coverage(
 
                 # Create the covmap
                 for exec_line in self.dual_file_map[filename]['executed_lines']:
-                    if exec_line > fstart and exec_line < fend:
+                    if (exec_line > fstart) and (exec_line < fend or fend == -1):
                         logger.debug(f"E: {exec_line}")
                         self.covmap[fname].append((exec_line, 1000))
                 for non_exec_line in self.dual_file_map[filename]['missing_lines']:
-                    if non_exec_line > fstart and non_exec_line < fend:
+                    if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1):
                         logger.debug(f"N: {non_exec_line}")
                         self.covmap[fname].append((non_exec_line, 0))
+
+
+    def correlate_python_functions_with_coverage(
+        self,
+        function_list,
+    ) -> None:
+
+        logger.info("Correlating")
+        # For each function identified in the ast identify the file
+        # where it resides in with respect to the filepaths from the
+        # coverage collection. Store this including the linumber
+        # of the function definition.
+        file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
+        for func_key in function_list:
+            func = function_list[func_key]
+            function_name = func.function_name
+            function_line = func.function_linenumber
+
+            logger.debug(f"Correlated init: {function_name} ---- {function_line}")
+            cov_file = self._python_ast_funcname_to_cov_file(function_name)
+            if cov_file is None:
+                continue
+
+            # Return False if file is not in file_map
+            if cov_file not in self.file_map:
+                logger.debug("Target key is not in file_map")
+                continue
+
+            if cov_file not in file_and_function_mappings:
+                file_and_function_mappings[cov_file] = []
+
+            file_and_function_mappings[cov_file].append(
+                (function_name, function_line)
+            )
+
+        # Sort and retrieve line range of all functions
+        function_internals = self.retrieve_func_line(file_and_function_mappings)
+
+        # Map the source codes of each line with coverage information.
+        # Store the result in covmap to be compatible with other languages.
+        self._map_func_covmap(function_internals)
+
         return
 
     def correlate_jvm_method_with_coverage(
@@ -308,50 +328,12 @@ def correlate_jvm_method_with_coverage(
                 (function_name, function_line)
             )
 
-        logger.debug("Geting function start and end line")
-        function_internals: Dict[str, List[Tuple[str, int, int]]] = dict()
-        for cov_file, function_specs in file_and_function_mappings.items():
-            # Sort by line number
-            sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1]))
-
-            function_internals[cov_file] = []
-            for i in range(len(sorted_func_specs)):
-                fname, fstart = sorted_func_specs[i]
-
-                # Get next function lineno to identify boundary
-                if i < len(sorted_func_specs) - 1:
-                    fnext_name, fnext_start = sorted_func_specs[i + 1]
-                    function_internals[cov_file].append(
-                        (fname, fstart, fnext_start - 1)
-                    )
-                else:
-                    # Last function identified by end lineno being -1
-                    function_internals[cov_file].append((fname, fstart, -1))
+        # Sort and retrieve line range of all functions
+        function_internals = self.retrieve_func_line(file_and_function_mappings)
 
         # Map the source codes of each line with coverage information.
         # Store the result in covmap to be compatible with other languages.
-        for filename in function_internals:
-            logger.debug(f"Filename: {filename}")
-            for fname, fstart, fend in function_internals[filename]:
-                logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")
-                if fname not in self.covmap:
-                    # Fail safe
-                    self.covmap[fname] = []
-
-                # If we have the file in dual_file_map identify the
-                # executed vs non-executed lines and store in covmap.
-                if filename not in self.dual_file_map:
-                    continue
-
-                # Create the covmap
-                for exec_line in self.dual_file_map[filename]['executed_lines']:
-                    if (exec_line > fstart) and (exec_line < fend or fend == -1):
-                        logger.debug(f"E: {exec_line}")
-                        self.covmap[fname].append((exec_line, 1000))
-                for non_exec_line in self.dual_file_map[filename]['missing_lines']:
-                    if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1):
-                        logger.debug(f"N: {non_exec_line}")
-                        self.covmap[fname].append((non_exec_line, 0))
+        self._map_func_covmap(function_internals)
 
         return
 
@@ -580,6 +562,14 @@ def load_jvm_coverage(
     target_dir: str,
     target_name: Optional[str] = None
 ) -> CoverageProfile:
+   """Find and load jacoco.xml, a jvm xml coverage report file
+
+    The xml file is generated from Jacoco plugin. The specific dtd of the xml can
+    be found in the following link:
+    - https://www.jacoco.org/jacoco/trunk/coverage/report.dtd
+
+    Return a CoverageProfile
+    """
     import xml.etree.ElementTree as ET
     cp = CoverageProfile()
     cp.set_type("file")

From 6e241db77acbb28b12262dfd8dd9ebc41bae8eac Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Tue, 22 Nov 2022 19:22:22 +0000
Subject: [PATCH 14/15] Fix bug

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index d3b5e9df3..61db84d63 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -296,7 +296,7 @@ def correlate_python_functions_with_coverage(
             )
 
         # Sort and retrieve line range of all functions
-        function_internals = self.retrieve_func_line(file_and_function_mappings)
+        function_internals = self._retrieve_func_line(file_and_function_mappings)
 
         # Map the source codes of each line with coverage information.
         # Store the result in covmap to be compatible with other languages.

From 70a1bf4d0804b5d1deee351815ce6035b3c443bf Mon Sep 17 00:00:00 2001
From: Arthur Chan <arthur.chan@adalogics.com>
Date: Tue, 22 Nov 2022 19:23:40 +0000
Subject: [PATCH 15/15] Fix formatting

Signed-off-by: Arthur Chan <arthur.chan@adalogics.com>
---
 src/fuzz_introspector/code_coverage.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fuzz_introspector/code_coverage.py b/src/fuzz_introspector/code_coverage.py
index 61db84d63..ca40feab6 100644
--- a/src/fuzz_introspector/code_coverage.py
+++ b/src/fuzz_introspector/code_coverage.py
@@ -261,7 +261,6 @@ def _map_func_covmap(
                         logger.debug(f"N: {non_exec_line}")
                         self.covmap[fname].append((non_exec_line, 0))
 
-
     def correlate_python_functions_with_coverage(
         self,
         function_list,