Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Java-Integration]: Coverage report #612

Merged
merged 16 commits into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions src/fuzz_introspector/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,14 +183,12 @@ def get_node_coverage_hitcount(
# As this is the first node ensure it is indeed the entrypoint.
# The difference is this node has node "parent" or prior nodes.

# Arthur: Not true anymore for cpp or jvm with multiple yaml file
# TODO Relaxing this constraint temporary, fixed later
# if not profile.func_is_entrypoint(demangled_name):
# raise AnalysisError(
# "First node in calltree seems to be non-fuzzer function"
# )

if not profile.func_is_entrypoint(demangled_name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one we should watch out for as I'm not sure the condition will always be true. I'm thinking with regards to Python here.

raise AnalysisError(
"First node in calltree is non-fuzzer function"
)
coverage_data = profile.coverage.get_hit_details(demangled_name)

if len(coverage_data) == 0:
logger.error("There is no coverage data (not even all negative).")
node.cov_parent = "EP"
Expand Down Expand Up @@ -224,7 +222,6 @@ def get_node_coverage_hitcount(
if ih:
node_hitcount = 200
elif profile.target_lang == "jvm":
# TODO Handle for jvm added later
coverage_data = profile.coverage.get_hit_details(
callstack_get_parent(node, callstack)
)
Expand All @@ -238,6 +235,7 @@ def get_node_coverage_hitcount(
raise AnalysisError(
"A node should either be the first or it must have a parent"
)

return node_hitcount


Expand Down
184 changes: 147 additions & 37 deletions src/fuzz_introspector/code_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def get_hit_details(self, funcname: str) -> List[Tuple[int, int]]:

if fuzz_key is None or fuzz_key not in self.covmap:
return []

return self.covmap[fuzz_key]

def _python_ast_funcname_to_cov_file(
Expand Down Expand Up @@ -204,49 +205,22 @@ def _python_ast_funcname_to_cov_file(

return target_key

def correlate_python_functions_with_coverage(
def _retrieve_func_line(
self,
function_list,
) -> None:

logger.info("Correlating")
# For each function identified in the ast identify the file
# where it resides in with respect to the filepaths from the
# coverage collection. Store this including the linumber
# of the function definition.
file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
for func_key in function_list:
func = function_list[func_key]
function_name = func.function_name
function_line = func.function_linenumber

logger.debug(f"Correlated init: {function_name} ---- {function_line}")
cov_file = self._python_ast_funcname_to_cov_file(function_name)
if cov_file is None:
continue

# Return False if file is not in file_map
if cov_file not in self.file_map:
logger.debug("Target key is not in file_map")
continue

if cov_file not in file_and_function_mappings:
file_and_function_mappings[cov_file] = []

file_and_function_mappings[cov_file].append(
(function_name, function_line)
)

file_and_function_mappings,
) -> Dict[str, List[Tuple[str, int, int]]]:
# Sort function and lines numbers for each coverage file.
# Store in function_internals.
logger.debug("Function intervals")
logger.debug("Geting function start and end line")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: typo

function_internals: Dict[str, List[Tuple[str, int, int]]] = dict()
for cov_file, function_specs in file_and_function_mappings.items():
# Sort by line number
sorted_func_specs = list(sorted(function_specs, key=lambda x: x[1]))

function_internals[cov_file] = []
for i in range(len(sorted_func_specs)):
fname, fstart = sorted_func_specs[i]

# Get next function lineno to identify boundary
if i < len(sorted_func_specs) - 1:
fnext_name, fnext_start = sorted_func_specs[i + 1]
Expand All @@ -257,13 +231,19 @@ def correlate_python_functions_with_coverage(
# Last function identified by end lineno being -1
function_internals[cov_file].append((fname, fstart, -1))

# Map the source codes of each line with coverage information.
# Store the result in covmap to be compatible with other languages.
return function_internals

def _map_func_covmap(
self,
function_internals,
) -> None:
for filename in function_internals:
logger.debug(f"Filename: {filename}")
for fname, fstart, fend in function_internals[filename]:
logger.debug(f"--- {fname} ::: {fstart} ::: {fend}")

if fname not in self.covmap:
# Fail safe
self.covmap[fname] = []

# If we have the file in dual_file_map identify the
Expand All @@ -273,13 +253,87 @@ def correlate_python_functions_with_coverage(

# Create the covmap
for exec_line in self.dual_file_map[filename]['executed_lines']:
if exec_line > fstart and (exec_line < fend or fend == -1):
if (exec_line > fstart) and (exec_line < fend or fend == -1):
logger.debug(f"E: {exec_line}")
self.covmap[fname].append((exec_line, 1000))
for non_exec_line in self.dual_file_map[filename]['missing_lines']:
if non_exec_line > fstart and (non_exec_line < fend or fend == -1):
if (non_exec_line > fstart) and (non_exec_line < fend or fend == -1):
logger.debug(f"N: {non_exec_line}")
self.covmap[fname].append((non_exec_line, 0))

def correlate_python_functions_with_coverage(
self,
function_list,
) -> None:

logger.info("Correlating")
# For each function identified in the ast identify the file
# where it resides in with respect to the filepaths from the
# coverage collection. Store this including the linumber
# of the function definition.
file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
for func_key in function_list:
func = function_list[func_key]
function_name = func.function_name
function_line = func.function_linenumber

logger.debug(f"Correlated init: {function_name} ---- {function_line}")
cov_file = self._python_ast_funcname_to_cov_file(function_name)
if cov_file is None:
continue

# Return False if file is not in file_map
if cov_file not in self.file_map:
logger.debug("Target key is not in file_map")
continue

if cov_file not in file_and_function_mappings:
file_and_function_mappings[cov_file] = []

file_and_function_mappings[cov_file].append(
(function_name, function_line)
)

# Sort and retrieve line range of all functions
function_internals = self._retrieve_func_line(file_and_function_mappings)

# Map the source codes of each line with coverage information.
# Store the result in covmap to be compatible with other languages.
self._map_func_covmap(function_internals)

return

def correlate_jvm_method_with_coverage(
arthurscchan marked this conversation as resolved.
Show resolved Hide resolved
self,
function_list,
) -> None:
logger.debug("Correlating JVM")

file_and_function_mappings: Dict[str, List[Tuple[str, int]]] = dict()
for (func_key, func) in function_list.items():
function_name = func.function_name
function_line = func.function_linenumber
class_name = func.function_source_file
logger.debug(f"Correlated init: {class_name} ---- {function_name} ---- {function_line}")

if class_name not in self.file_map:
logger.debug("Fail to find matching class")
continue

if class_name not in file_and_function_mappings:
file_and_function_mappings[class_name] = []

file_and_function_mappings[class_name].append(
(function_name, function_line)
)

# Sort and retrieve line range of all functions
function_internals = self._retrieve_func_line(file_and_function_mappings)

# Map the source codes of each line with coverage information.
# Store the result in covmap to be compatible with other languages.
self._map_func_covmap(function_internals)

return

def get_hit_summary(
Expand Down Expand Up @@ -503,6 +557,62 @@ def load_python_json_coverage(
return cp


def load_jvm_coverage(
target_dir: str,
target_name: Optional[str] = None
) -> CoverageProfile:
arthurscchan marked this conversation as resolved.
Show resolved Hide resolved
"""Find and load jacoco.xml, a jvm xml coverage report file

The xml file is generated from Jacoco plugin. The specific dtd of the xml can
be found in the following link:
- https://www.jacoco.org/jacoco/trunk/coverage/report.dtd

Return a CoverageProfile
"""
import xml.etree.ElementTree as ET
cp = CoverageProfile()
cp.set_type("file")

coverage_reports = utils.get_all_files_in_tree_with_regex(target_dir, "jacoco.xml")
logger.info(f"FOUND XML COVERAGE FILES: {str(coverage_reports)}")

if len(coverage_reports) > 0:
xml_file = coverage_reports[0]
else:
logger.info("Found no coverage files")
return cp

cp.coverage_files.append(xml_file)
xml_tree = ET.parse(xml_file)
root = xml_tree.getroot()

for package in root.findall('package'):
for cl in package.findall('sourcefile'):
cov_entry = cl.attrib['name']
if package.attrib['name']:
cov_entry = "%s/%s" % (package.attrib['name'], cov_entry)
cov_entry = cov_entry.replace("/", ".")
cov_entry = cov_entry.replace(".java", "")
executed_lines = []
missing_lines = []
d_executed_lines = []
d_missing_lines = []
for line in cl.findall('line'):
if line.attrib['ci'] > "0":
executed_lines.append((int(line.attrib['nr']), 1000))
d_executed_lines.append(int(line.attrib['nr']))
else:
missing_lines.append((int(line.attrib['nr']), 0))
d_missing_lines.append(int(line.attrib['nr']))

cp.file_map[cov_entry] = executed_lines
cp.dual_file_map[cov_entry] = dict()
cp.dual_file_map[cov_entry]['executed_lines'] = d_executed_lines
cp.dual_file_map[cov_entry]['missing_lines'] = d_missing_lines

return cp


if __name__ == "__main__":
logging.basicConfig()
logger.info("Starting coverage loader")
Expand Down
10 changes: 8 additions & 2 deletions src/fuzz_introspector/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ def add_func_to_reached_and_clone(
# Update hitcount of all functions reached by the function
for func_name in func_to_add.functions_reached:
if func_name not in merged_profile.all_functions:
logger.error(f"Mismatched function name: {func_name}")
if merged_profile_old.profiles[0].target_lang == "jvm":
logger.debug(f"{func_name} not provided within classpath")
else:
logger.error(f"Mismatched function name: {func_name}")
continue
f = merged_profile.all_functions[func_name]
f.hitcount += 1
Expand All @@ -110,7 +113,10 @@ def add_func_to_reached_and_clone(
uncovered_cc = 0
for reached_func_name in f_profile.functions_reached:
if reached_func_name not in merged_profile.all_functions:
logger.error(f"Mismatched function name: {reached_func_name}")
if merged_profile_old.profiles[0].target_lang == "jvm":
logger.debug(f"{reached_func_name} not provided within classpath")
else:
logger.error(f"Mismatched function name: {reached_func_name}")
continue
f_reached = merged_profile.all_functions[reached_func_name]
cc += f_reached.cyclomatic_complexity
Expand Down
45 changes: 36 additions & 9 deletions src/fuzz_introspector/datatypes/fuzzer_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def identifier(self):
return os.path.basename(self.fuzzer_source_file).replace(".py", "")

elif self._target_lang == "jvm":
# TODO Handle jvm fuzzer source file
pass
# Class name is used for jvm identifier
return os.path.basename(self.fuzzer_source_file)

return self.fuzzer_source_file

Expand Down Expand Up @@ -150,8 +150,12 @@ def resolve_coverage_link(
function_name
)
elif self.target_lang == "jvm":
# TODO Add coverage report for JVM
pass
return self._resolve_jvm_coverage_link(
cov_url,
source_file,
lineno,
function_name
)
else:
logger.info("Could not find any html_status.json file")
return "#"
Expand Down Expand Up @@ -364,9 +368,11 @@ def get_cov_metrics(
total_func_lines, hit_lines = self.coverage.get_hit_summary(funcname)
if total_func_lines is None or hit_lines is None:
return None, None, None

hit_percentage = (hit_lines / total_func_lines) * 100.0
return total_func_lines, hit_lines, hit_percentage
if total_func_lines == 0:
return 0, 0, 0
else:
hit_percentage = (hit_lines / total_func_lines) * 100.0
return total_func_lines, hit_lines, hit_percentage
except Exception:
return None, None, None

Expand Down Expand Up @@ -440,11 +446,14 @@ def _load_coverage(self, target_folder: str) -> None:
self.all_class_functions
)
elif self.target_lang == "jvm":
# TODO Add JVM coverage loading support
self.coverage = code_coverage.load_llvm_coverage(
self.coverage = code_coverage.load_jvm_coverage(
target_folder,
self.identifier
)
if self.coverage is not None:
self.coverage.correlate_jvm_method_with_coverage(
self.all_class_functions
)
else:
raise DataLoaderError(
"The profile target has no coverage loading support"
Expand Down Expand Up @@ -556,3 +565,21 @@ def _resolve_python_coverage_link(
else:
logger.info("Could not find any html_status.json file")
return "#"

def _resolve_jvm_coverage_link(
self,
cov_url: str,
source_file: str,
lineno: int,
function_name: str
) -> str:
"""Resolves link to HTML coverage report for JVM targets"""
# Handle source class for jvm
if ("." in source_file):
# Source file has package, change all . to path separator
source_file = source_file.replace(".", os.sep)
else:
# Source fil has no package, add in default package
source_file = os.path.join("default", source_file)

return cov_url + os.sep + source_file + ".html#L" + str(lineno)
Loading