From b237a4333d74690151737986266e205ac8ddd34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 11 Jun 2018 18:51:00 +0200 Subject: [PATCH 01/48] Convert most Bash scripts to Python --- scripts/retdec_archive_decompiler.py | 202 +++++++++ scripts/retdec_config.py | 78 ++++ scripts/retdec_fileinfo.py | 70 ++++ .../retdec_signature_from_library_creator.py | 179 ++++++++ scripts/retdec_unpacker.py | 214 ++++++++++ scripts/retdec_utils.py | 393 ++++++++++++++++++ 6 files changed, 1136 insertions(+) create mode 100644 scripts/retdec_archive_decompiler.py create mode 100644 scripts/retdec_config.py create mode 100644 scripts/retdec_fileinfo.py create mode 100644 scripts/retdec_signature_from_library_creator.py create mode 100644 scripts/retdec_unpacker.py create mode 100644 scripts/retdec_utils.py diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py new file mode 100644 index 000000000..0beeba1e7 --- /dev/null +++ b/scripts/retdec_archive_decompiler.py @@ -0,0 +1,202 @@ +#! /usr/bin/env python3 + +import argparse +import os +import re +import subprocess +import sys + +import retdec_config as config +from retdec_utils import Utils +from retdec_utils import CmdRunner + + +def parse_args(): + parser = argparse.ArgumentParser(description='Runs the decompilation script with the given optional arguments over' + ' all files in the given static library or prints list of files in' + ' plain text with --plain argument or in JSON format with' + ' --json argument. You can pass arguments for decompilation after' + ' double-dash -- argument.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument("--plain", + dest="plain_format", + help="print list of files in plain text") + + parser.add_argument("--json", + dest="json_format", + help="print list of files in json format") + + parser.add_argument("--list", + dest="list_mode", + help="list") + + parser.add_argument("--", + nargs='+', + dest="arg_list", + help="args passed to the decompiler") + + parser.add_argument("file", + help="path") + + return parser.parse_args() + + +class ArchiveDecompiler: + def __init__(self, _args): + self.args = _args + + self.decompiler_sh_args = '' + self.timeout = 300 + self.tmp_archive = '' + self.use_json_format = False + self.use_plain_format = False + self.enable_list_mode = False + self.library_path = '' + self.file_count = 0 + + def _print_error_plain_or_json(self, error): + """Prints error in either plain text or JSON format. + One argument required: error message. + """ + if self.use_json_format: + message = re.escape(error) + print('{') + print(' \'error\' : \'' + message + '\'') + print('}') + # exit(1) + else: + # Otherwise print in plain text. + Utils.print_error_and_die(error) + + def _cleanup(self): + """Cleans up all temporary files. + No arguments accepted. + """ + if os.path.exists(self.tmp_archive): + Utils.remove_forced(self.tmp_archive) + + def _check_arguments(self): + + if self.args.list_mode: + self.enable_list_mode = True + + if self.args.plain_format: + if self.use_json_format: + Utils.print_error_and_die('Arguments --plain and --json are mutually exclusive.') + return False + else: + self.enable_list_mode = True + self.use_plain_format = True + + if self.args.json_format: + if self.args.args.plain_format: + Utils.print_error_and_die('Arguments --plain and --json are mutually exclusive.') + return False + else: + self.enable_list_mode = True + self.use_json_format = True + + if self.args.arg_list: + self.decompiler_sh_args = ' '.join(self.args.arg_list) + + if self.args.file: + if not (os.path.isfile(self.args.file)): + Utils.print_error_and_die('Input %s is not a valid file.' % self.args.file) + return False + + self.library_path = self.args.file + + if self.library_path == '': + self._print_error_plain_or_json('No input file.') + return False + + return True + + def decompile_archive(self): + + # Check arguments + if not self._check_arguments(): + return + + # Check for archives packed in Mach-O Universal Binaries. + if Utils.is_macho_archive(self.library_path): + if self.enable_list_mode: + if self.use_json_format: + subprocess.call([config.EXTRACT, '--objects', '--json', self.library_path], shell=True) + else: + subprocess.call([config.EXTRACT, '--objects', self.library_path], shell=True) + # sys.exit(1) + + self.tmp_archive = self.library_path + '.a' + subprocess.call([config.EXTRACT, '--best', '--out', self.tmp_archive, self.library_path], shell=True) + self.library_path = self.tmp_archive + + # Check for thin archives. + if Utils.has_thin_archive_signature(self.library_path) == 0: + self._print_error_plain_or_json('File is a thin archive and cannot be decompiled.') + return + + # Check if file is archive + if not Utils.is_valid_archive(self.library_path): + self._print_error_plain_or_json('File is not supported archive or is not readable.') + return + + # Check number of files. + self.file_count = Utils.archive_object_count(self.library_path) + + if self.file_count <= 0: + self._print_error_plain_or_json('No files found in archive.') + return + + # List only mode. + if self.enable_list_mode: + if self.use_json_format: + Utils.archive_list_numbered_content_json(self.library_path) + else: + Utils.archive_list_numbered_content(self.library_path) + + self._cleanup() + # sys.exit(0) + + # Run the decompilation script over all the found files. + print('Running \`%s' % config.DECOMPILER_SH, end='') + + if self.decompiler_sh_args != '': + print(self.decompiler_sh_args, end='') + + print('\` over %d files with timeout %d s. (run \`kill %d \` to terminate this script)...' % ( + self.file_count, self.timeout, os.getpid()), file=sys.stderr) + + cmd = CmdRunner() + for i in range(self.file_count): + file_index = (i + 1) + print('%d/%d\t\t' % (file_index, self.file_count)) + + # We have to use indexes instead of names because archives can contain multiple files with same name. + log_file = self.library_path + '.file_' + str(file_index) + '.log.verbose' + + # Do not escape! + output, _, timeouted = cmd.run_cmd([config.DECOMPILER_SH, '--ar-index=' + str(i), '-o', + self.library_path + '.file_' + str(file_index) + '.c', + self.library_path, self.decompiler_sh_args], timeout=self.timeout) + + with open(log_file, 'wb') as f: + f.write(output) + + if timeouted: + print('[TIMEOUT]') + else: + print('[OK]') + + self._cleanup() + # sys.exit(0) + + +if __name__ == '__main__': + args = parse_args() + + archive_decompiler = ArchiveDecompiler(args) + archive_decompiler.decompile_archive() + + sys.exit(0) diff --git a/scripts/retdec_config.py b/scripts/retdec_config.py new file mode 100644 index 000000000..6861bf867 --- /dev/null +++ b/scripts/retdec_config.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +import os + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + +"""Paths (everything has to be without the ending slash '/'). + +Paths relative from script path. +""" +INSTALL_BIN_DIR = SCRIPT_DIR +UNIT_TESTS_DIR = INSTALL_BIN_DIR +INSTALL_SHARE_DIR = INSTALL_BIN_DIR + '/../share/retdec' +INSTALL_SUPPORT_DIR = INSTALL_SHARE_DIR + '/support' +INSTALL_SHARE_YARA_DIR = INSTALL_SUPPORT_DIR + '/generic/yara_patterns' +# generic configuration +GENERIC_TYPES_DIR = INSTALL_SUPPORT_DIR + '/generic/types' +GENERIC_SIGNATURES_DIR = INSTALL_SHARE_YARA_DIR + '/static-code' +# ARM-specific configuration +ARM_ORDS_DIR = INSTALL_SUPPORT_DIR + '/arm/ords' +# X86-specific configuration +X86_ORDS_DIR = INSTALL_SUPPORT_DIR + '/x86/ords' + +"""BIN2LLVMIR parameters +The following list of passes is -O3 + * with -disable-inlining -disable-simplify-libcalls -constprop -die -dce -ipconstprop -instnamer + * without -internalize -inline -inline-cost -notti -deadargelim -argpromotion -simplify-libcalls -loop-unroll + -loop-unswitch -sroa -tailcallelim -functionattrs -memcpyopt -prune-eh + +The following options are useful during debugging of bin2llvmirl optimizations. +parameters beginning with -disable-* may be included only once, which is the + * -print-after-all -debug-only=idioms -print-before=idioms -print-after=idioms + + -unreachable-funcs is automatically removed in decompilation script when the + -k/--keep-unreachable-funcs parameter is used. + + - We need to run -instcombine after -dead-global-assign to eliminate dead + instructions after this optimization. + + - Optimization -phi2seq is needed to be run at the end and not to run two + times. This is the reason why it is placed at the very end. +""" +BIN2LLVMIR_PARAMS_DISABLES = '-disable-inlining -disable-simplify-libcalls' +BIN2LLVMIR_LLVM_PASSES_ONLY = '-instcombine -tbaa -targetlibinfo -basicaa -domtree -simplifycfg -domtree -early-cse' \ + ' -lower-expect -targetlibinfo -tbaa -basicaa -globalopt -mem2reg -instcombine' \ + ' -simplifycfg -basiccg -domtree -early-cse -lazy-value-info -jump-threading' \ + ' -correlated-propagation -simplifycfg -instcombine -simplifycfg -reassociate -domtree' \ + ' -loops -loop-simplify -lcssa -loop-rotate -licm -lcssa -instcombine -scalar-evolution' \ + ' -loop-simplifycfg -loop-simplify -aa -loop-accesses -loop-load-elim -lcssa -indvars' \ + ' -loop-idiom -loop-deletion -memdep -gvn -memdep -sccp -instcombine -lazy-value-info' \ + ' -jump-threading -correlated-propagation -domtree -memdep -dse -dce -bdce -adce -die' \ + ' -simplifycfg -instcombine -strip-dead-prototypes -globaldce -constmerge -constprop' \ + ' -instnamer -domtree -instcombine' + +BIN2LLVMIR_PARAMS = '-provider-init -decoder -verify -main-detection -idioms-libgcc -inst-opt -register' \ + ' -cond-branch-opt -syscalls -stack -constants -param-return -local-vars -inst-opt -simple-types' \ + ' -generate-dsm -remove-asm-instrs -class-hierarchy -select-fncs -unreachable-funcs -inst-opt' \ + ' -value-protect $BIN2LLVMIR_LLVM_PASSES_ONLY $BIN2LLVMIR_LLVM_PASSES_ONLY -simple-types' \ + ' -stack-ptr-op-remove -inst-opt -idioms -global-to-local -dead-global-assign -instcombine' \ + ' -phi2seq -value-protect $BIN2LLVMIR_PARAMS_DISABLES' + +# Paths to tools. +FILEINFO = INSTALL_BIN_DIR + '/retdec-fileinfo' +FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES = [INSTALL_SHARE_YARA_DIR + '/signsrch/signsrch.yara'] +FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES = [INSTALL_SHARE_YARA_DIR + '/signsrch/signsrch_regex.yara'] +AR = INSTALL_BIN_DIR + '/retdec-ar-extractor' +BIN2PAT = INSTALL_BIN_DIR + '/retdec-bin2pat' +PAT2YARA = INSTALL_BIN_DIR + '/retdec-pat2yara' +CONFIGTOOL = INSTALL_BIN_DIR + '/retdec-config' +EXTRACT = INSTALL_BIN_DIR + '/retdec-macho-extractor' +DECOMPILER_SH = INSTALL_BIN_DIR + '/retdec-decompiler.sh' +ARCHIVE_DECOMPILER_PY = INSTALL_BIN_DIR + '/retdec_archive_decompiler.py' +SIG_FROM_LIB_PY = INSTALL_BIN_DIR + '/retdec_signature_from_library_creator.py' +UNPACK_PY = INSTALL_BIN_DIR + '/retdec_unpacker.py' +LLVMIR2HLL = INSTALL_BIN_DIR + '/retdec-llvmir2hll' +BIN2LLVMIR = INSTALL_BIN_DIR + '/retdec-bin2llvmir' +IDA_COLORIZER = INSTALL_BIN_DIR + '/retdec-color-c.py' +UNPACKER = INSTALL_BIN_DIR + '/retdec-unpacker' diff --git a/scripts/retdec_fileinfo.py b/scripts/retdec_fileinfo.py new file mode 100644 index 000000000..754b37de5 --- /dev/null +++ b/scripts/retdec_fileinfo.py @@ -0,0 +1,70 @@ +#! /usr/bin/env python3 + +"""When analyzing an archive, use the archive decompilation script `--list` instead of +`fileinfo` because fileinfo is currently unable to analyze archives. + +First, we have to find path to the input file. We take the first parameter +that does not start with a dash. This is a simplification and may not work in +all cases. A proper solution would need to parse fileinfo parameters, which +would be complex. +""" + +import argparse +import subprocess +import sys + +import retdec_config as config +from retdec_utils import Utils +from retdec_archive_decompiler import ArchiveDecompiler + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('-j', '--json', + dest='json', + action='store_true', + help='Set to forward --json to the archive decompilation script.') + + parser.add_argument('--use-external-patterns', + dest='external_patterns', + action='store_true', + help='Should use external patterns') + + parser.add_argument('file', + metavar='FILE', + help='File to analyze.') + + return parser.parse_args() + + +if __name__ == '__main__': + args = parse_args() + + if Utils.has_archive_signature(args.file): + # The input file is not an archive. + + # The input file is an archive, so use the archive decompilation script + # instead of fileinfo. + archive_decompiler_args = [args.file, '--list'] + + if args.json: + archive_decompiler_args.append('--json') + + res = ArchiveDecompiler(archive_decompiler_args).decompile_archive() + sys.exit(res) + + # We are not analyzing an archive, so proceed to fileinfo. + fileinfo_params = [args.file] + + for par in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto', par]) + + if args.external_patterns: + for par in config.FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto', par]) + + subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py new file mode 100644 index 000000000..8f9895b0c --- /dev/null +++ b/scripts/retdec_signature_from_library_creator.py @@ -0,0 +1,179 @@ +#! /usr/bin/env python3 + +"""Create Yara rules file from static libraries.""" + +import argparse +import shutil +import sys +import os +import subprocess +import tempfile +from pathlib import Path + +import retdec_config as config +from retdec_utils import Utils + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('input', + nargs='+', + metavar='FILE', + help='Input file(s)') + + parser.add_argument('-n', '--no-cleanup', + dest='no_cleanup', + action='store_true', + help='Temporary .pat files will be kept.') + + parser.add_argument('-o', '--output', + dest='output', + metavar='FILE', + required=True, + help='Where result(s) will be stored.') + + parser.add_argument('-m', '--min-pure', + dest='min_pure', + default=16, + help='Minimum pure information needed for patterns.') + + parser.add_argument('-i', '--ignore-nops', + dest='ignore_nops', + help='Ignore trailing NOPs when computing (pure) size.') + + parser.add_argument('-l', '--logfile', + dest='logfile', + action='store_true', + help='Add log-file with \'.log\' suffix from pat2yara.') + + parser.add_argument('-b', '--bin2pat-only', + dest='bin_to_pat_only', + action='store_true', + help='Stop after bin2pat.') + + return parser.parse_args() + + +class SigFromLib: + def __init__(self, _args): + self.args = _args + self.ignore_nop = '' + self.file_path = '' + self.tmp_dir_path = '' + + def print_error_and_cleanup(self, message): + """Print error message and clean up temporary files. + """ + + # Cleanup. + if not self.args.no_cleanup: + if os.path.exists(self.tmp_dir_path): + Utils.remove_forced(self.tmp_dir_path) + + Utils.print_error_and_die(message + '.') + + def _check_arguments(self): + + for f in self.args.input: + if not os.path.isfile(f): + self.print_error_and_cleanup('input %s is not a valid file nor argument' % f) + return False + + self.file_path = self.args.output + dir_name = os.path.dirname(Utils.get_realpath(self.file_path)) + self.tmp_dir_path = os.path.join(dir_name, 'XXXXXXXXX') + + if self.args.ignore_nops: + self.ignore_nop = '--ignore-nops' + + return True + + def run(self): + if not self._check_arguments(): + return 1 + + pattern_files = [] + object_dirs = [] + + # Create .pat files for every library. + for lib_path in self.args.input: + # Check for invalid archives. + if not Utils.is_valid_archive(lib_path): + print('ignoring file %s - not valid archive' % lib_path) + continue + + # Get library name for .pat file. + lib_name = Path(lib_path).resolve().stem + + # Create sub-directory for object files. + object_dir = os.path.join(self.tmp_dir_path, lib_name) + '-objects' + object_dirs = [object_dir] + os.makedirs(object_dir) + + # Extract all files to temporary folder. + subprocess.call([config.AR, lib_path, '--extract', '--output', object_dir], shell=True) + + # List all extracted objects. + objects = [] + + for root, dirs, files in os.walk(object_dir): + for f in files: + fname = os.path.join(root, f) + if os.path.isfile(fname): + objects.append(fname) + + # Extract patterns from library. + pattern_file = os.path.join(self.tmp_dir_path, lib_name) + '.pat' + pattern_files = [pattern_file] + result = subprocess.call([config.BIN2PAT, '-o', pattern_file] + objects, shell=True) + + if result != 0: + self.print_error_and_cleanup('utility bin2pat failed when processing %s' % lib_path) + + # Remove extracted objects continuously. + if not self.args.no_cleanup: + if os.path.exists(object_dir): + shutil.rmtree(object_dir) + + # Skip second step - only .pat files will be created. + if self.args.bin_to_pat_only: + if not self.args.no_cleanup: + for d in object_dirs: + if os.path.exists(d): + shutil.rmtree(d) + # sys.exit(0) + return 1 + + # Create final .yara file from .pat files. + if self.args.logfile: + result = subprocess.call( + [config.PAT2YARA] + pattern_files + ['--min-pure', str(self.args.min_pure), '-o', self.file_path, '-l', + self.file_path + '.log', self.ignore_nop, + str(self.args.ignore_nops)], + shell=True) + + if result != 0: + self.print_error_and_cleanup('utility pat2yara failed') + else: + result = subprocess.call( + [config.PAT2YARA] + pattern_files + ['--min-pure', str(self.args.min_pure), '-o', self.file_path, + self.ignore_nop, str(self.args.ignore_nops)], shell=True) + + if result != 0: + self.print_error_and_cleanup('utility pat2yara failed') + + # Do cleanup. + if not self.args.no_cleanup: + Utils.remove_forced(self.tmp_dir_path) + + return result + + +if __name__ == '__main__': + args = parse_args() + + sig = SigFromLib(args) + + sys.exit(sig.run()) diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py new file mode 100644 index 000000000..f362a7b39 --- /dev/null +++ b/scripts/retdec_unpacker.py @@ -0,0 +1,214 @@ +#! /usr/bin/env python3 + +""" +The script tries to unpack the given executable file by using any +of the supported unpackers, which are at present: + * generic unpacker + * upx + +Required argument: + * (packed) binary file + +Optional arguments: + * desired name of unpacked file + * use extended exit codes + +Returns: + 0 successfully unpacked +""" + +import argparse +import os +import shutil +import sys + +import retdec_config as config +from retdec_utils import CmdRunner +from retdec_utils import Utils + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-e', '--extended-exit-codes', + dest='extended_exit_codes', + action='store_true', + help='Use more granular exit codes than just 0/1.') + + parser.add_argument('-o', '--output', + dest='output', + metavar='FILE', + help='Output file (default: file-unpacked).') + + parser.add_argument('--max-memory', + dest='max_memory', + help='Limit the maximal memory of retdec-unpacker to N bytes.') + + parser.add_argument('--max-memory-half-ram', + dest='max_memory_half_ram', + action='store_true', + help='Limit the maximal memory of retdec-unpacker to half of system RAM.') + + parser.add_argument('input', + metavar='FILE', + help='The input file.') + + return parser.parse_args() + + +class Unpacker: + RET_UNPACK_OK = 0 + # 1 generic unpacker - nothing to do; upx succeeded (--extended-exit-codes only) + RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK = 1 + # 2 not packed or unknown packer + RET_NOTHING_TO_DO = 2 + # 3 generic unpacker failed; upx succeeded (--extended-exit-codes only) + RET_UNPACKER_FAILED_OTHERS_OK = 3 + # 4 generic unpacker failed; upx not succeeded + RET_UNPACKER_FAILED = 4 + + UNPACKER_EXIT_CODE_OK = 0 + # 1 There was not found matching plugin. + UNPACKER_EXIT_CODE_NOTHING_TO_DO = 1 + # 2 At least one plugin failed at the unpacking of the file. + UNPACKER_EXIT_CODE_UNPACKING_FAILED = 2 + # 3 Error with preprocessing of input file before unpacking. + UNPACKER_EXIT_CODE_PREPROCESSING_ERROR = 3 + + def __init__(self, _args): + self.args = _args + self.input = '' + self.output = '' + + def _check_arguments(self): + """Check proper combination of input arguments. + """ + + # Check whether the input file was specified. + if self.args.input is None: + Utils.print_error_and_die('No input file was specified') + return False + + if not os.access(self.args.input, os.R_OK): + Utils.print_error_and_die('The input file %s does not exist or is not readable' % self.args.input) + return False + + # Conditional initialization. + if not self.args.output: + self.output = self.args.input + '-unpacked' + else: + self.output = self.args.output + + if self.args.max_memory is not None: + try: + max_memory = int(self.args.max_memory) + if max_memory > 0: + return True + except ValueError: + Utils.print_error_and_die( + 'Invalid value for --max-memory: %s (expected a positive integer)' % self.args.max_memory) + return False + + # Convert to absolute paths. + self.input = Utils.get_realpath(self.args.input) + self.output = Utils.get_realpath(self.output) + + return True + + def _unpack(self, output): + """Try to unpack the given file. + """ + + unpacker_params = [self.input, '-o', output] + + if self.args.max_memory: + unpacker_params.extend(['--max-memory', self.args.max_memory]) + elif self.args.max_memory_half_ram: + unpacker_params.append('--max-memory-half-ram') + + print() + print('##### Trying to unpack ' + self.input + ' into ' + output + ' by using generic unpacker...') + print('RUN: ' + config.UNPACKER + ' '.join(unpacker_params)) + + cmd = CmdRunner() + out, unpacker_rc, _ = cmd.run_cmd([config.UNPACKER] + unpacker_params) + + if unpacker_rc == self.UNPACKER_EXIT_CODE_OK: + print('##### Unpacking by using generic unpacker: successfully unpacked') + return out, self.RET_UNPACK_OK + elif unpacker_rc == self.UNPACKER_EXIT_CODE_NOTHING_TO_DO: + print('##### Unpacking by using generic unpacker: nothing to do') + else: + # Do not return -> try the next unpacker + # UNPACKER_EXIT_CODE_UNPACKING_FAILED + # UNPACKER_EXIT_CODE_PREPROCESSING_ERROR + print('##### Unpacking by using generic unpacker: failed') + + if not Utils.is_windows(): + # Do not return -> try the next unpacker + # Try to unpack via UPX + print() + print('##### Trying to unpack ' + self.input + ' into ' + output + ' by using UPX...') + print('RUN: upx -d ' + self.input + ' -o ' + output) + + out, upx_rc, _ = cmd.run_cmd(['upx', '-d', self.input, '-o', output]) + + if upx_rc == 0: + print('##### Unpacking by using UPX: successfully unpacked') + if self.args.extended_exit_codes: + if unpacker_rc == self.UNPACKER_EXIT_CODE_NOTHING_TO_DO: + return out, self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK + elif unpacker_rc >= self.UNPACKER_EXIT_CODE_UNPACKING_FAILED: + return out, self.RET_UNPACKER_FAILED_OTHERS_OK + else: + return out, self.RET_UNPACK_OK + else: + # We cannot distinguish whether upx failed or the input file was + # not upx-packed + print('##### Unpacking by using UPX: nothing to do') + else: + print('##### UPX not available on Windows') + + # Do not return -> try the next unpacker + # Return. + if unpacker_rc >= self.UNPACKER_EXIT_CODE_UNPACKING_FAILED: + return out, self.RET_UNPACKER_FAILED + else: + return out, self.RET_NOTHING_TO_DO + + def unpack_all(self): + # Check arguments and set default values for unset options. + if not self._check_arguments(): + return '', -1 + + res_rc = -1 + res_out = '' + tmp_output = self.output + '.tmp' + + while True: + output, return_code = self._unpack(tmp_output) + + if return_code == self.RET_UNPACK_OK or return_code == self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK \ + or return_code == self.RET_UNPACKER_FAILED_OTHERS_OK: + res_rc = return_code + res_out += output + + shutil.move(tmp_output, self.output) + self.input = self.output + else: + # Remove the temporary file, just in case some of the unpackers crashed + # during unpacking and left it on the disk (e.g. upx). + if os.path.exists(tmp_output): + os.remove(tmp_output) + break + + return res_out, return_code if res_rc == -1 else res_rc + + +if __name__ == '__main__': + args = parse_args() + + unpacker = Unpacker(args) + _, rc = unpacker.unpack_all() + sys.exit(rc) diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py new file mode 100644 index 000000000..cf7acabbe --- /dev/null +++ b/scripts/retdec_utils.py @@ -0,0 +1,393 @@ +#! /usr/bin/env python3 + +"""Compilation and decompilation utility functions. +""" +import os +import pathlib +import re +import shutil +import signal +import subprocess +import sys + +import retdec_config as config + +"""Taken from https://github.com/avast-tl/retdec-regression-tests-framework/blob/master/regression_tests/cmd_runner.py +""" + + +class CmdRunner: + """A runner of external commands.""" + + def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', + output_encoding='utf-8', strip_shell_colors=True): + """Runs the given command (synchronously). + + :param list cmd: Command to be run as a list of arguments (strings). + :param bytes input: Input to be used when running the command. + :param int timeout: Number of seconds after which the command should be + terminated. + :param str input_encoding: Encode the command's output in this encoding. + :param str output_encoding: Decode the command's output in this encoding. + :param bool strip_shell_colors: Should shell colors be stripped from + the output? + + :returns: A triple (`output`, `return_code`, `timeouted`). + + The meaning of the items in the return value are: + + * `output` contains the combined output from the standard outputs and + standard error, + * `return_code` is the return code of the command, + * `timeouted` is either `True` or `False`, depending on whether the + command has timeouted. + + If `input` is a string (`str`), not `bytes`, it is decoded into `bytes` + by using `input_encoding`. + + If `output_encoding` is not ``None``, the returned data are decoded in + that encoding. Also, all line endings are converted to ``'\\n'``, and + if ``strip_shell_colors`` is ``True``, shell colors are stripped. + Otherwise, if `output_encoding` is ``None``, the data are directly + returned as raw bytes without any conversions. + + To disable the timeout, pass ``None`` as `timeout` (the default). + + If the timeout expires before the command finishes, the value of `output` + is the command's output generated up to the timeout. + """ + + def decode(output): + if output_encoding is not None: + output = output.decode(output_encoding, errors='replace') + output = re.sub(r'\r\n?', '\n', output) + if strip_shell_colors: + return re.sub(r'\x1b[^m]*m', '', output) + return output + + # The communicate() call below expects the input to be in bytes, so + # convert it unless it is already in bytes. + if not isinstance(input, bytes): + input = input.encode(input_encoding) + + try: + p = self.start(cmd) + output, _ = p.communicate(input, timeout) + return decode(output), p.returncode, False + except subprocess.TimeoutExpired: + # Kill the process, along with all its child processes. + p.kill() + # Finish the communication to obtain the output. + output, _ = p.communicate() + return decode(output), p.returncode, True + + def start(self, cmd, discard_output=False): + """Starts the given command and returns a handler to it. + + :param list cmd: Command to be run as a list of arguments (strings). + :param bool discard_output: Should the output be discarded instead of + being buffered so it can be obtained later? + + :returns: A handler to the started command (``subprocess.Popen``). + + If the output is irrelevant for you, you should set `discard_output` to + ``True``. + """ + # The implementation is platform-specific because we want to be able to + # kill the children alongside with the process. + kwargs = dict( + args=cmd, + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL if discard_output else subprocess.PIPE, + stderr=subprocess.DEVNULL if discard_output else subprocess.STDOUT + ) + if Utils.is_windows(): + return _WindowsProcess(**kwargs) + else: + return _LinuxProcess(**kwargs) + + +class _LinuxProcess(subprocess.Popen): + """An internal wrapper around ``subprocess.Popen`` for Linux.""" + + def __init__(self, **kwargs): + # To ensure that all the process' children terminate when the process + # is killed, we use a process group so as to enable sending a signal to + # all the processes in the group. For that, we attach a session ID to + # the parent process of the spawned child processes. This will make it + # the group leader of the processes. When a signal is sent to the + # process group leader, it's transmitted to all of the child processes + # of this group. + # + # os.setsid is passed in the argument preexec_fn so it's run after + # fork() and before exec(). + # + # This solution is based on http://stackoverflow.com/a/4791612. + kwargs['preexec_fn'] = os.setsid + super().__init__(**kwargs) + + def kill(self): + """Kills the process, including its children.""" + os.killpg(self.pid, signal.SIGTERM) + + +class _WindowsProcess(subprocess.Popen): + """An internal wrapper around ``subprocess.Popen`` for Windows.""" + + def __init__(self, **kwargs): + # Shell scripts need to be run with 'sh' on Windows. Simply running the + # script by its path doesn't work. That is, for example, instead of + # + # /path/to/retdec-decompiler.sh + # + # we need to run + # + # sh /path/to/retdec-decompiler.sh + # + if 'args' in kwargs and kwargs['args'] and kwargs['args'][0].endswith('.sh'): + kwargs['args'].insert(0, 'sh') + super().__init__(**kwargs) + + def kill(self): + """Kills the process, including its children.""" + # Since os.setsid() and os.killpg() are not available on Windows, we + # have to do this differently. More specifically, we do this by calling + # taskkill, which also kills the process' children. + # + # This solution is based on + # http://mackeblog.blogspot.cz/2012/05/killing-subprocesses-on-windows-in.html + cmd = ['taskkill', '/F', '/T', '/PID', str(self.pid)] + subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +class Utils: + + @staticmethod + def remove_forced(path): + for n in os.listdir(path): + p = os.path.join(path, n) + if os.path.isdir(p): + shutil.rmtree(p) + else: + os.unlink(p) + + @staticmethod + def is_windows(): + return sys.platform in ('win32', 'msys') + + @staticmethod + def get_realpath(path): + """Prints the real, physical location of a directory or file, relative or + absolute. + 1 argument is needed + """ + return str(pathlib.Path(path).resolve()) + + @staticmethod + def print_error_and_die(error): + """Print error message to stderr and die. + 1 argument is needed + Returns - 1 if number of arguments is incorrect + """ + # if error is None: + # sys.exit(1) + + sys.stderr.write('Error: %s' % error) + # sys.exit(1) + + @staticmethod + def print_warning(warning): + """Print warning message to stderr. + """ + if warning is None: + return + + sys.stderr.write('Warning: %s' % warning) + + @staticmethod + def has_archive_signature(path): + """Check if file has any ar signature. + 1 argument is needed - file path + Returns - 0 if file has ar signature + 1 if number of arguments is incorrect + 2 no signature + """ + if subprocess.call([config.AR, path, '--arch-magic'], shell=True): + return 0 + return 2 + + @staticmethod + def has_thin_archive_signature(path): + """Check if file has thin ar signature. + 1 argument is needed - file path + Returns - 0 if file has thin ar signature + 1 if number of arguments is incorrect + 2 no signature + """ + if subprocess.call([config.AR, path, '--thin-magic'], shell=True): + return 0 + return 2 + + @staticmethod + def is_valid_archive(path): + """Check if file is an archive we can work with. + 1 argument is needed - file path + Returns - 0 if file is valid archive + 1 if file is invalid archive + """ + # We use our own messages so throw original output away. + return subprocess.call([config.AR, path, '--valid'], shell=True, stderr=subprocess.STDOUT, + stdout=None) + + @staticmethod + def archive_object_count(path): + """Counts object files in archive. + 1 argument is needed - file path + Returns - 1 if error occurred + """ + return subprocess.call([config.AR, path, '--object-count'], shell=True) + + @staticmethod + def archive_list_content(path): + """Print content of archive. + 1 argument is needed - file path + Returns - 1 if number of arguments is incorrect + """ + return subprocess.call([config.AR, path, '--list', '--no-numbers'], shell=True) + + @staticmethod + def archive_list_numbered_content(path): + """Print numbered content of archive. + 1 argument is needed - file path + Returns - 1 if number of arguments is incorrect + """ + print('Index\tName') + return subprocess.call([config.AR, path, '--list'], shell=True) + + @staticmethod + def archive_list_numbered_content_json(path): + """Print numbered content of archive in JSON format. + 1 argument is needed - file path + Returns - 1 if number of arguments is incorrect + """ + return subprocess.call([config.AR, path, '--list', '--json'], shell=True) + + @staticmethod + def archive_get_by_name(path, name, output): + """Get a single file from archive by name. + 3 arguments are needed - path to the archive + - name of the file + - output path + Returns - 1 if number of arguments is incorrect + - 2 if error occurred + """ + if not subprocess.call([config.AR, path, '--name', name, '--output', output], + shell=True, stderr=subprocess.STDOUT, stdout=None): + return 2 + + return 1 + + @staticmethod + def archive_get_by_index(archive, index, output): + """Get a single file from archive by index. + 3 arguments are needed - path to the archive + - index of the file + - output path + Returns - 1 if number of arguments is incorrect + - 2 if error occurred + """ + if not subprocess.call([config.AR, archive, '--index', index, '--output', output], + shell=True, stderr=subprocess.STDOUT, stdout=None): + return 2 + + @staticmethod + def is_macho_archive(path): + """Check if file is Mach-O universal binary with archives. + 1 argument is needed - file path + Returns - 0 if file is archive + 1 if file is not archive + """ + return subprocess.call([config.EXTRACT, '--check-archive', path], shell=True, + stderr=subprocess.STDOUT, stdout=None) + + @staticmethod + def is_decimal_number(num): + """Check string is a valid decimal number. + 1 argument is needed - string to check. + Returns - 0 if string is a valid decimal number. + 1 otherwise + """ + regex = '^[0-9]+$' + if re.search(regex, str(num)): + return True + else: + return False + + @staticmethod + def is_hexadecimal_number(num): + """Check string is a valid hexadecimal number. + 1 argument is needed - string to check. + Returns - 0 if string is a valid hexadecimal number. + 1 otherwise + """ + regex = '^0x[0-9a-fA-F]+$' + if re.search(regex, str(num)): + return True + else: + return False + + @staticmethod + def is_number(num): + """Check string is a valid number (decimal or hexadecimal). + 1 argument is needed - string to check. + Returns - 0 if string is a valid number. + 1 otherwise + """ + if Utils.is_decimal_number(num): + return True + + if Utils.is_hexadecimal_number(num): + return True + + return False + + @staticmethod + def is_decimal_range(num): + """Check string is a valid decimal range. + 1 argument is needed - string to check. + Returns - 0 if string is a valid decimal range. + 1 otherwise + """ + regex = '^[0-9]+-[0-9]+$' + if re.search(regex, str(num)): + return True + else: + return False + + @staticmethod + def is_hexadecimal_range(num): + """Check string is a valid hexadecimal range + 1 argument is needed - string to check. + Returns - 0 if string is a valid hexadecimal range + 1 otherwise + """ + regex = '^0x[0-9a-fA-F]+-0x[0-9a-fA-F]+$' + if re.search(regex, str(num)): + return True + else: + return False + + @staticmethod + def is_range(num): + """Check string is a valid range (decimal or hexadecimal). + 1 argument is needed - string to check. + Returns - 0 if string is a valid range + 1 otherwise + """ + if Utils.is_decimal_range(num): + return True + + if Utils.is_hexadecimal_range(num): + return True + + return False From 97ae64161acfff2f2a7f6d4d69c70fd75b96f09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 19:14:08 +0200 Subject: [PATCH 02/48] Port cmake/install-share to python --- cmake/install-share.py | 120 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 cmake/install-share.py diff --git a/cmake/install-share.py b/cmake/install-share.py new file mode 100644 index 000000000..30697c2b8 --- /dev/null +++ b/cmake/install-share.py @@ -0,0 +1,120 @@ +#! /usr/bin/env python3 + +""" +Get RetDec share directory. +""" +import hashlib +import os +import shutil +import sys +from tarfile import TarFile, TarError +from urllib import request, error + +# Check arguments. +import urllib + +if len(sys.argv) != 2: + print('ERROR: Unexpected number of arguments.') + sys.exit(1) + +############################################################################### + +version_filename = 'version.txt' +arch_suffix = 'tar.xz' + +sha256hash_ref = 'b54ba07e2f28143c9afe34a9d5b4114fb61f3c1175b9807caced471fec82001e' +version = '2018-02-08' + +############################################################################### + +arch_name = 'retdec-support' + '_' + version + '.' + arch_suffix + +# Get install path from script options. +install_path = sys.argv[1] + +share_dir = os.path.join(install_path, 'share') +share_retdec_dir = os.path.join(share_dir, 'retdec') +support_dir = os.path.join(share_retdec_dir, 'support') + +arch_path = os.path.join(support_dir, arch_name) + + +############################################################################### + +def cleanup(): + if os.path.exists(support_dir): + for n in os.listdir(support_dir): + p = os.path.join(support_dir, n) + if os.path.isdir(p): + shutil.rmtree(p) + else: + os.unlink(p) + + +# Share directory exists. +if os.path.exists(support_dir): + # Version file exists. + if os.path.isfile(os.path.join(support_dir, version_filename)): + with open(os.path.join(support_dir, version_filename)) as version_file: + version_from_file = version_file.read() + + if version == version_from_file: + print('%s already exists, version is ok' % support_dir) + sys.exit(0) + else: + print('versions is not as expected -> replace with expected version') + + cleanup() + +# Make sure destination directory exists. +os.mkdir(support_dir) + +# Get archive using wget. +arch_url = 'https://github.com/avast-tl/retdec-support/releases/download/%s/%s' % (version, arch_name) +print('Downloading archive from %s ...' % arch_url) + +try: + response = urllib.request.urlopen(arch_url, timeout=10) + content = response.read() + with open(arch_path, 'w') as f: + f.write(content) +except (error.HTTPError, error.URLError): + print('ERROR: download failed') + cleanup() + sys.exit(1) + +# Compute hash of the downloaded archive. +print('Verfifying archive\'s checksum ...') + +sha256 = hashlib.sha256() +with open(arch_path, 'rb') as f: + try: + sha256.update(f.read()) + except IOError: + print('ERROR: failed to compute the SHA-256 hash of the archive') + cleanup() + sys.exit(1) + +sha256hash = sha256.hexdigest() + +# Check that hash is ok. +if sha256hash != sha256hash_ref: + print('ERROR: downloaded archive is invalid (SHA-256 hash check failed)') + cleanup() + sys.exit(1) + +# Unpack archive. +print('Unpacking archive ...') +with TarFile(arch_path) as tar: + try: + tar.extractall(support_dir) + except TarError: + print('ERROR: failed to unpack the archive') + cleanup() + sys.exit(1) + +# Remove archive. +os.remove(arch_path) + +print('RetDec support directory downloaded OK') +sys.exit(0) From d922d13a38a9e68fb980cefe01b08a05ebe96a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 19:14:46 +0200 Subject: [PATCH 03/48] Some cleanup --- scripts/retdec_archive_decompiler.py | 18 +++++++++--------- scripts/retdec_fileinfo.py | 8 ++++---- .../retdec_signature_from_library_creator.py | 6 ++---- scripts/retdec_unpacker.py | 14 +++++++------- scripts/retdec_utils.py | 17 +++++++++-------- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 0beeba1e7..4e6f940cf 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -19,6 +19,10 @@ def parse_args(): ' double-dash -- argument.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("file", + metavar='FILE', + help='File to analyze.') + parser.add_argument("--plain", dest="plain_format", help="print list of files in plain text") @@ -36,9 +40,6 @@ def parse_args(): dest="arg_list", help="args passed to the decompiler") - parser.add_argument("file", - help="path") - return parser.parse_args() @@ -67,14 +68,13 @@ def _print_error_plain_or_json(self, error): # exit(1) else: # Otherwise print in plain text. - Utils.print_error_and_die(error) + Utils.print_error(error) def _cleanup(self): """Cleans up all temporary files. No arguments accepted. """ - if os.path.exists(self.tmp_archive): - Utils.remove_forced(self.tmp_archive) + Utils.remove_forced(self.tmp_archive) def _check_arguments(self): @@ -83,7 +83,7 @@ def _check_arguments(self): if self.args.plain_format: if self.use_json_format: - Utils.print_error_and_die('Arguments --plain and --json are mutually exclusive.') + Utils.print_error('Arguments --plain and --json are mutually exclusive.') return False else: self.enable_list_mode = True @@ -91,7 +91,7 @@ def _check_arguments(self): if self.args.json_format: if self.args.args.plain_format: - Utils.print_error_and_die('Arguments --plain and --json are mutually exclusive.') + Utils.print_error('Arguments --plain and --json are mutually exclusive.') return False else: self.enable_list_mode = True @@ -102,7 +102,7 @@ def _check_arguments(self): if self.args.file: if not (os.path.isfile(self.args.file)): - Utils.print_error_and_die('Input %s is not a valid file.' % self.args.file) + Utils.print_error('Input %s is not a valid file.' % self.args.file) return False self.library_path = self.args.file diff --git a/scripts/retdec_fileinfo.py b/scripts/retdec_fileinfo.py index 754b37de5..d8a91d946 100644 --- a/scripts/retdec_fileinfo.py +++ b/scripts/retdec_fileinfo.py @@ -24,6 +24,10 @@ def parse_args(): formatter_class=argparse.RawDescriptionHelpFormatter ) + parser.add_argument('file', + metavar='FILE', + help='File to analyze.') + parser.add_argument('-j', '--json', dest='json', action='store_true', @@ -34,10 +38,6 @@ def parse_args(): action='store_true', help='Should use external patterns') - parser.add_argument('file', - metavar='FILE', - help='File to analyze.') - return parser.parse_args() diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 8f9895b0c..89fea8b59 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -69,10 +69,9 @@ def print_error_and_cleanup(self, message): # Cleanup. if not self.args.no_cleanup: - if os.path.exists(self.tmp_dir_path): - Utils.remove_forced(self.tmp_dir_path) + Utils.remove_forced(self.tmp_dir_path) - Utils.print_error_and_die(message + '.') + Utils.print_error(message + '.') def _check_arguments(self): @@ -175,5 +174,4 @@ def run(self): args = parse_args() sig = SigFromLib(args) - sys.exit(sig.run()) diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index f362a7b39..b7b4778ee 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -31,6 +31,10 @@ def parse_args(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('input', + metavar='FILE', + help='The input file.') + parser.add_argument('-e', '--extended-exit-codes', dest='extended_exit_codes', action='store_true', @@ -50,10 +54,6 @@ def parse_args(): action='store_true', help='Limit the maximal memory of retdec-unpacker to half of system RAM.') - parser.add_argument('input', - metavar='FILE', - help='The input file.') - return parser.parse_args() @@ -87,11 +87,11 @@ def _check_arguments(self): # Check whether the input file was specified. if self.args.input is None: - Utils.print_error_and_die('No input file was specified') + Utils.print_error('No input file was specified') return False if not os.access(self.args.input, os.R_OK): - Utils.print_error_and_die('The input file %s does not exist or is not readable' % self.args.input) + Utils.print_error('The input file %s does not exist or is not readable' % self.args.input) return False # Conditional initialization. @@ -106,7 +106,7 @@ def _check_arguments(self): if max_memory > 0: return True except ValueError: - Utils.print_error_and_die( + Utils.print_error( 'Invalid value for --max-memory: %s (expected a positive integer)' % self.args.max_memory) return False diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index cf7acabbe..1208d0d94 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -164,12 +164,13 @@ class Utils: @staticmethod def remove_forced(path): - for n in os.listdir(path): - p = os.path.join(path, n) - if os.path.isdir(p): - shutil.rmtree(p) - else: - os.unlink(p) + if os.path.exists(path): + for n in os.listdir(path): + p = os.path.join(path, n) + if os.path.isdir(p): + shutil.rmtree(p) + else: + os.unlink(p) @staticmethod def is_windows(): @@ -184,7 +185,7 @@ def get_realpath(path): return str(pathlib.Path(path).resolve()) @staticmethod - def print_error_and_die(error): + def print_error(error): """Print error message to stderr and die. 1 argument is needed Returns - 1 if number of arguments is incorrect @@ -192,7 +193,7 @@ def print_error_and_die(error): # if error is None: # sys.exit(1) - sys.stderr.write('Error: %s' % error) + print('Error: %s' % error, file=sys.stdout) # sys.exit(1) @staticmethod From fdb6b4aa9effb73a7236497eb5ea519a6b11eeaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 20:55:20 +0200 Subject: [PATCH 04/48] Replaced install-share in cmake file with fully working python version --- cmake/install-external.cmake | 2 +- cmake/install-share.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cmake/install-external.cmake b/cmake/install-external.cmake index fa77c2ce7..160acf4eb 100644 --- a/cmake/install-external.cmake +++ b/cmake/install-external.cmake @@ -1,6 +1,6 @@ install(CODE " execute_process( - COMMAND sh \"${CMAKE_SOURCE_DIR}/cmake/install-share.sh\" \"${CMAKE_INSTALL_PREFIX}\" + COMMAND python \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES ) if(INSTALL_SHARE_RES) diff --git a/cmake/install-share.py b/cmake/install-share.py index 30697c2b8..3df9b275a 100644 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -7,12 +7,11 @@ import os import shutil import sys -from tarfile import TarFile, TarError +import tarfile +import urllib from urllib import request, error # Check arguments. -import urllib - if len(sys.argv) != 2: print('ERROR: Unexpected number of arguments.') sys.exit(1) @@ -56,7 +55,7 @@ def cleanup(): # Version file exists. if os.path.isfile(os.path.join(support_dir, version_filename)): with open(os.path.join(support_dir, version_filename)) as version_file: - version_from_file = version_file.read() + version_from_file = version_file.read().split('\n')[0] if version == version_from_file: print('%s already exists, version is ok' % support_dir) @@ -67,17 +66,14 @@ def cleanup(): cleanup() # Make sure destination directory exists. -os.mkdir(support_dir) +os.makedirs(support_dir, exist_ok=True) # Get archive using wget. arch_url = 'https://github.com/avast-tl/retdec-support/releases/download/%s/%s' % (version, arch_name) print('Downloading archive from %s ...' % arch_url) try: - response = urllib.request.urlopen(arch_url, timeout=10) - content = response.read() - with open(arch_path, 'w') as f: - f.write(content) + urllib.request.urlretrieve(arch_url, arch_path) except (error.HTTPError, error.URLError): print('ERROR: download failed') cleanup() @@ -105,10 +101,10 @@ def cleanup(): # Unpack archive. print('Unpacking archive ...') -with TarFile(arch_path) as tar: +with tarfile.open(arch_path) as tar: try: tar.extractall(support_dir) - except TarError: + except tarfile.ExtractError: print('ERROR: failed to unpack the archive') cleanup() sys.exit(1) From 8849cdea6cf15ceecdc55809151cd19bff45e5ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 21:24:04 +0200 Subject: [PATCH 05/48] Attemp to fix compile error --- cmake/install-share.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmake/install-share.py b/cmake/install-share.py index 3df9b275a..37deef79a 100644 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -8,8 +8,7 @@ import shutil import sys import tarfile -import urllib -from urllib import request, error +import urllib.request # Check arguments. if len(sys.argv) != 2: @@ -68,13 +67,13 @@ def cleanup(): # Make sure destination directory exists. os.makedirs(support_dir, exist_ok=True) -# Get archive using wget. +# Download archive arch_url = 'https://github.com/avast-tl/retdec-support/releases/download/%s/%s' % (version, arch_name) print('Downloading archive from %s ...' % arch_url) try: urllib.request.urlretrieve(arch_url, arch_path) -except (error.HTTPError, error.URLError): +except (urllib.request.HTTPError, urllib.request.URLError): print('ERROR: download failed') cleanup() sys.exit(1) From ed45adcc34efb9baf1cf6552beebb5ade9b5f4b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 21:42:26 +0200 Subject: [PATCH 06/48] Fix compile for older python version --- cmake/install-share.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/install-share.py b/cmake/install-share.py index 37deef79a..1b7d1ca95 100644 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -8,7 +8,7 @@ import shutil import sys import tarfile -import urllib.request +import urllib # Check arguments. if len(sys.argv) != 2: @@ -72,8 +72,8 @@ def cleanup(): print('Downloading archive from %s ...' % arch_url) try: - urllib.request.urlretrieve(arch_url, arch_path) -except (urllib.request.HTTPError, urllib.request.URLError): + urllib.urlretrieve(arch_url, arch_path) +except IOError: print('ERROR: download failed') cleanup() sys.exit(1) From 97c456b07d619da9add22931089e83808189d7c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 12 Jun 2018 21:56:00 +0200 Subject: [PATCH 07/48] Try calling python 3 instead --- cmake/install-external.cmake | 2 +- cmake/install-share.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/install-external.cmake b/cmake/install-external.cmake index 160acf4eb..83061f29a 100644 --- a/cmake/install-external.cmake +++ b/cmake/install-external.cmake @@ -1,6 +1,6 @@ install(CODE " execute_process( - COMMAND python \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" + COMMAND python3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES ) if(INSTALL_SHARE_RES) diff --git a/cmake/install-share.py b/cmake/install-share.py index 1b7d1ca95..37deef79a 100644 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -8,7 +8,7 @@ import shutil import sys import tarfile -import urllib +import urllib.request # Check arguments. if len(sys.argv) != 2: @@ -72,8 +72,8 @@ def cleanup(): print('Downloading archive from %s ...' % arch_url) try: - urllib.urlretrieve(arch_url, arch_path) -except IOError: + urllib.request.urlretrieve(arch_url, arch_path) +except (urllib.request.HTTPError, urllib.request.URLError): print('ERROR: download failed') cleanup() sys.exit(1) From 55c73dc4f385a6e05868f46e794a6908b768b8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 14 Jun 2018 15:04:11 +0200 Subject: [PATCH 08/48] Make install-share.py executable Compilable decompiler version --- cmake/install-share.py | 2 +- scripts/retdec_decompiler.py | 1878 ++++++++++++++++++++++++++++++++++ 2 files changed, 1879 insertions(+), 1 deletion(-) mode change 100644 => 100755 cmake/install-share.py create mode 100644 scripts/retdec_decompiler.py diff --git a/cmake/install-share.py b/cmake/install-share.py old mode 100644 new mode 100755 index 37deef79a..ee0c5c5a0 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """ Get RetDec share directory. diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py new file mode 100644 index 000000000..c0804f047 --- /dev/null +++ b/scripts/retdec_decompiler.py @@ -0,0 +1,1878 @@ +#! /usr/bin/env python3 + +"""Decompiles the given file into the selected target high-level language.""" + +import argparse +import glob +import hashlib +import os +import re +import shutil +import subprocess +import sys +import threading +import time +from datetime import date +from pathlib import Path + +import retdec_config as config +from retdec_utils import Utils, CmdRunner +from retdec_signature_from_library_creator import SigFromLib +from retdec_unpacker import Unpacker + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('input', + metavar='FILE', + help='File to decompile.') + + parser.add_argument('-a', '--arch', + dest='arch', + metavar='ARCH', + choices=['mips', 'pic32', 'arm', 'thumb', 'powerpc', 'x86'], + help='Specify target architecture [mips|pic32|arm|thumb|powerpc|x86].' + ' Required if it cannot be autodetected from the input (e.g. raw mode, Intel HEX).') + + parser.add_argument('-e', '--endian', + dest='endian', + metavar='ENDIAN', + choices=['little', 'big'], + help='Specify target endianness [little|big].' + ' Required if it cannot be autodetected from the input (e.g. raw mode, Intel HEX).') + + parser.add_argument('-k', '--keep-unreachable-funcs', + dest='keep_unreachable_funcs', + action='store_true', + help='Keep functions that are unreachable from the main function.') + + parser.add_argument('-l', '--target-language', + dest='hll', + default='c', + metavar='LANGUAGE', + choices=['c', 'py'], + help='Target high-level language [c|py].') + + parser.add_argument('-m', '--mode', + dest='mode', + metavar='MODE', + choices=['bin', 'll', 'raw'], + help='Force the type of decompilation mode [bin|ll|raw]' + '(default: ll if input\'s suffix is \'.ll\', bin otherwise).') + + parser.add_argument('-o', '--output', + dest='output', + metavar='FILE', + default='file.ext', + help='Output file.') + + parser.add_argument('-p', '--pdb', + dest='pdb', + metavar='FILE', + help='File with PDB debug information.') + + parser.add_argument('--generate-log', + dest='generate_log', + help='Generate log') + + parser.add_argument('--ar-index', + dest='ar_index', + metavar='INDEX', + help='Pick file from archive for decompilation by its zero-based index.') + + parser.add_argument('--ar-name', + dest='ar_name', + metavar='NAME', + help='Pick file from archive for decompilation by its name.') + + parser.add_argument('--backend-aggressive-opts', + dest='backend_aggressive_opts', + help='Enables aggressive optimizations.') + + parser.add_argument('--backend-arithm-expr-evaluator', + dest='backend_arithm_expr_evaluator', + default='c', + help='Name of the used evaluator of arithmetical expressions .') + + parser.add_argument('--backend-call-info-obtainer', + dest='backend_call_info_obtainer', + default='optim', + help='Name of the obtainer of information about function calls.') + + parser.add_argument('--backend-cfg-test', + dest='backend_cfg_test', + help='Unifies the labels of all nodes in the emitted CFG (this has to be used in tests).') + + parser.add_argument('--backend-disabled-opts', + dest='backend_disabled_opts', + help='Prevents the optimizations from the given' + ' comma-separated list of optimizations to be run.') + + parser.add_argument('--backend-emit-cfg', + dest='backend_emit_cfg', + help='Emits a CFG for each function in the backend IR (in the .dot format).') + + parser.add_argument('--backend-emit-cg', + dest='backend_emit_cg', + help='Emits a CG for the decompiled module in the backend IR (in the .dot format).') + + parser.add_argument('--backend-cg-conversion', + dest='backend_cg_conversion', + default='auto', + choices=['auto', 'manual'], + help='Should the CG from the backend be converted automatically into the desired format? ' + '[auto|manual].') + + parser.add_argument('--backend-cfg-conversion', + dest='backend_cfg_conversion', + default='auto', + help='Should CFGs from the backend be converted automatically into the desired format?') + + parser.add_argument('--backend-enabled-opts', + dest='backend_enabled_opts', + help='Runs only the optimizations from the given comma-separated list of optimizations.') + + parser.add_argument('--backend-find-patterns', + dest='backend_find_patterns', + help='Runs the finders of patterns specified in the given comma-separated list ' + '(use \'all\' to run them all).') + + parser.add_argument('--backend-force-module-name', + dest='backend_force_module_name', + help='Overwrites the module name that was detected/generated by the front-end.') + + parser.add_argument('--backend-keep-all-brackets', + dest='backend_keep_all_brackets', + help='Keeps all brackets in the generated code.') + + parser.add_argument('--backend-keep-library-funcs', + dest='backend_keep_library_funcs', + help='Keep functions from standard libraries.') + + parser.add_argument('--backend-llvmir2bir-converter', + dest='backend_llvmir2bir_converter', + default='orig', + help='Name of the converter from LLVM IR to BIR.') + + parser.add_argument('--backend-no-compound-operators', + dest='backend_no_compound_operators', + help='Do not emit compound operators (like +=) instead of assignments.') + + parser.add_argument('--backend-no-debug', + dest='backend_no_debug', + help='Disables the emission of debug messages, such as phases.') + + parser.add_argument('--backend-no-debug-comments', + dest='backend_no_debug_comments', + help='Disables the emission of debug comments in the generated code.') + + parser.add_argument('--backend-no-opts', + dest='backend_no_opts', + help='Disables backend optimizations.') + + parser.add_argument('--backend-no-symbolic-names', + dest='backend_no_symbolic_names', + help='Disables the conversion of constant arguments to their symbolic names.') + + parser.add_argument('--backend-no-time-varying-info', + dest='backend_no_time_varying_info', + help='Do not emit time-varying information, like dates.') + + parser.add_argument('--backend-no-var-renaming', + dest='backend_no_var_renaming', + help='Disables renaming of variables in the backend.') + + parser.add_argument('--backend-semantics', + dest='backend_semantics', + help='A comma-separated list of the used semantics.') + + parser.add_argument('--backend-strict-fpu-semantics', + dest='backend_strict_fpu_semantics', + help='Disables backend optimizations.') + + parser.add_argument('--backend-var-renamer', + dest='backend_var_renamer', + default='readable', + metavar='STYLE', + choices=['address', 'hungarian', 'readable', 'simple', 'unified'], + help='Used renamer of variables [address|hungarian|readable|simple|unified]') + + parser.add_argument('--cleanup', + dest='self.cleanup', + help='Removes temporary files created during the decompilation.') + + parser.add_argument('--color-for-ida', + dest='color_for_ida ', + help='Put IDA Pro color tags to output C file.') + + parser.add_argument('--config', + dest='config', + help='Specify JSON decompilation configuration file.') + + parser.add_argument('--no-config', + dest='no_config', + help='State explicitly that config file is not to be used.') + + parser.add_argument('--fileinfo-verbose', + dest='fileinfo_verbose', + help='Print all detected information about input file.') + + parser.add_argument('--fileinfo-use-all-external-patterns', + dest='fileinfo_use_all_external_patterns', + help='Use all detection rules from external YARA databases.') + + parser.add_argument('--graph-format', + dest='graph_format', + default='png', + metavar='FORMAT', + choices=['pdf', 'png', 'svg'], + help='Specify format of a all generated graphs (e.g. CG, CFG) [pdf|png|svg].') + + parser.add_argument('--raw-entry-point', + dest='raw_entry_point', + metavar='ADDRESS', + help='Entry point address used for raw binary (default: architecture dependent)') + + parser.add_argument('--raw-section-vma', + dest='raw_section_vma', + metavar='ADDRESS', + help='Virtual address where section created from the raw binary will be placed') + + parser.add_argument('--select-decode-only', + dest='select_decode_only', + help='Decode only selected parts (functions/ranges). Faster decompilation, but worse results.') + + parser.add_argument('--select-functions', + dest='select_functions', + metavar='FUNCS', + help='Specify a comma separated list of functions to decompile (example: fnc1,fnc2,fnc3).') + + parser.add_argument('--select-ranges', + dest='select_ranges', + metavar='RANGES', + help='Specify a comma separated list of ranges to decompile ' + '(example: 0x100-0x200,0x300-0x400,0x500-0x600).') + + parser.add_argument('--stop-after', + dest='stop_after', + help='Stop the decompilation after the given tool ' + '(supported tools: fileinfo, unpacker, bin2llvmir, llvmir2hll).') + + parser.add_argument('--static-code-sigfile', + dest='static_code_sigfile', + help='Adds additional signature file for static code detection.') + + parser.add_argument('--static-code-archive', + dest='static_code_archive', + help='Adds additional signature file for static code detection from given archive.') + + parser.add_argument('--no-default-static-signatures', + dest='no_default_static_signatures', + help='No default signatures for statically linked code analysis are loaded ' + '(options static-code-sigfile/archive are still available).') + + parser.add_argument('--max-memory', + dest='max_memory', + help='Limits the maximal memory of fileinfo, unpacker, bin2llvmir, ' + 'and llvmir2hll into the given number of bytes.') + + parser.add_argument('--no-memory-limit', + dest='no_memory_limit', + help='Disables the default memory limit (half of system RAM) of fileinfo, ' + 'unpacker, bin2llvmir, and llvmir2hll.') + + return parser.parse_args() + + +class Decompiler: + def __init__(self, _args): + self.args = _args + self.timeout = 300 + self.input = '' + self.output = '' + self.config = '' + + self.out_unpacked = '' + self.out_frontend_ll = '' + self.out_frontend_bc = '' + self.out_backend_bc = '' + self.out_backend_ll = '' + self.out_restored = '' + self.out_archive = '' + self.signatures_to_remove = [] + self.tool_log_file = '' + + self.TOOL_LOG_FILE = '' + + def check_arguments(self): + """Check proper combination of input arguments. + """ + + global PICKED_FILE + + # Check whether the input file was specified. + if not self.args.input: + Utils.print_error('No input file was specified') + + # Try to detect desired decompilation mode if not set by user. + # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). + if not self.args.mode: + if Path(self.args.input).suffix == 'll': + # Suffix .ll + self.args.mode = 'll' + else: + self.args.mode = 'bin' + + # Print warning message about unsupported combinations of options. + if self.args.mode == 'll': + if self.args.arch: + Utils.print_warning('Option -a|--arch is not used in mode ' + self.args.mode) + + if self.args.pdb: + Utils.print_warning('Option -p|--pdb is not used in mode ' + self.args.mode) + + if not self.args.config or not self.args.no_config: + Utils.print_error('Option --config or --no-config must be specified in mode ' + self.args.mode) + elif self.args.mode == 'raw': + # Errors -- missing critical arguments. + if not self.args.arch: + Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) + + if not self.args.endian: + Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) + + if not self.args.raw_entry_point: + Utils.print_error('Option --raw-entry-point must be used with mode ' + self.args.mode) + + if not self.args.raw_section_vma: + Utils.print_error('Option --raw-section-vma must be used with mode ' + self.args.mode) + + if not Utils.is_number(self.args.raw_entry_point): + Utils.print_error( + 'Value in option --raw-entry-point must be decimal (e.g. 123) or hexadecimal value (e.g. 0x123)') + if not Utils.is_number(self.args.raw_section_vma): + Utils.print_error( + 'Value in option --raw-section-vma must be decimal (e.g. 123) or hexadecimal value (e.g. 0x123)') + + # Archive decompilation errors. + if self.args.ar_name and self.args.ar_index: + Utils.print_error('Options --ar-name and --ar-index are mutually exclusive. Pick one.') + + if self.args.mode != 'bin': + if self.args.ar_name: + Utils.print_warning('Option --ar-name is not used in mode ' + self.args.mode) + + if self.args.ar_index: + Utils.print_warning('Option --ar-index is not used in mode ' + self.args.mode) + + if not self.args.output: + # No output file was given, so use the default one. + (iname, ext) = os.path.splitext(self.input) + + if ext == 'll': + # Suffix .ll + self.output = iname + '.' + self.args.hll + elif ext == 'exe': + # Suffix .exe + self.output = iname + '.' + self.args.hll + elif ext == 'elf': + # Suffix .elf + self.output = iname + '.' + self.args.hll + elif ext == 'ihex': + # Suffix .ihex + self.output = iname + '.' + self.args.hll + elif ext == 'macho': + # Suffix .macho + self.output = iname + '.' + self.args.hll + else: + self.output = self.output + PICKED_FILE + '.' + self.args.hll + + # If the output file name matches the input file name, we have to change the + # output file name. Otherwise, the input file gets overwritten. + if self.input == self.output: + self.output = iname + '.out.' + self.args.hll + + # Convert to absolute paths. + self.input = Utils.get_realpath(self.args.input) + self.output = Utils.get_realpath(self.args.output) + + if os.path.exists(self.args.pdb): + self.args.pdb = Utils.get_realpath(self.args.pdb) + + # Check that selected ranges are valid. + if self.args.selected_ranges: + for r in self.args.selected_ranges: + # Check if valid range. + if not Utils.is_range(r): + Utils.print_error( + 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' + '(e.g. 0x123-0xabc) range.' % r) + + # Check if first <= last. + ranges = self.args.selected_ranges.split('-') + # parser line into array + if int(ranges[0]) > int(ranges[1]): + Utils.print_error( + 'Range \'%s\' in option --select-ranges is not a valid range: ' + 'second address must be greater or equal than the first one.' % ranges) + + def print_warning_if_decompiling_bytecode(self): + """Prints a warning if we are decompiling bytecode.""" + + cmd = CmdRunner() + bytecode, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--bytecode']) + # bytecode = os.popen('\'' + config.CONFIGTOOL + '\' \'' + CONFIG + '\' --read --bytecode').read().rstrip('\n') + + if bytecode != '': + Utils.print_warning( + 'Detected %s bytecode, which cannot be decompiled by our machine-code decompiler.' + ' The decompilation result may be inaccurate.' % bytecode) + + def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): + """Checks whether the decompilation should be forcefully stopped because of the + --stop-after parameter. If so, self.cleanup is run and the script exits with 0. + + Arguments: + + $1 Name of the tool. + + The function expects the $STOP_AFTER variable to be set. + """ + + if self.args.stop_after == tool_name: + if self.args.generate_log: + self.generate_log() + + self.cleanup() + print() + print('#### Forced stop due to - -stop - after %s...' % self.args.stop_after) + # sys.exit(0) + + def cleanup(self): + """Cleanup working directory""" + + if self.args.self.cleanup: + Utils.remove_forced(self.out_unpacked) + Utils.remove_forced(self.out_frontend_ll) + Utils.remove_forced(self.out_frontend_bc) + + if self.config != self.args.config: + Utils.remove_forced(self.config) + + Utils.remove_forced(self.out_backend_bc) + Utils.remove_forced(self.out_backend_ll) + Utils.remove_forced(self.out_restored) + + # Archive support + Utils.remove_forced(self.out_archive) + + # Archive support (Macho-O Universal) + for sig in self.signatures_to_remove: + Utils.remove_forced(sig) + + # Signatures generated from archives + if self.TOOL_LOG_FILE: + Utils.remove_forced(self.TOOL_LOG_FILE) + + def generate_log(self): + global LOG_DECOMPILATION_END_DATE + global LOG_FILEINFO_OUTPUT + global LOG_UNPACKER_OUTPUT + global LOG_BIN2LLVMIR_OUTPUT + global LOG_LLVMIR2HLL_OUTPUT + global LOG_DECOMPILATION_START_DATE + global FORMAT + global LOG_FILEINFO_RC + global LOG_UNPACKER_RC + global LOG_BIN2LLVMIR_RC + global LOG_LLVMIR2HLL_RC + global LOG_FILEINFO_RUNTIME + global LOG_BIN2LLVMIR_RUNTIME + global LOG_LLVMIR2HLL_RUNTIME + global LOG_FILEINFO_MEMORY + global LOG_BIN2LLVMIR_MEMORY + global LOG_LLVMIR2HLL_MEMORY + + LOG_FILE = self.output + '.decompilation.log' + LOG_DECOMPILATION_END_DATE = time.strftime('%S') + + LOG_FILEINFO_OUTPUT = self.json_escape(LOG_FILEINFO_OUTPUT) + LOG_UNPACKER_OUTPUT = self.json_escape(LOG_UNPACKER_OUTPUT) + LOG_BIN2LLVMIR_OUTPUT = self.remove_colors(LOG_BIN2LLVMIR_OUTPUT) + LOG_BIN2LLVMIR_OUTPUT = self.json_escape(LOG_BIN2LLVMIR_OUTPUT) + LOG_LLVMIR2HLL_OUTPUT = self.remove_colors(LOG_LLVMIR2HLL_OUTPUT) + LOG_LLVMIR2HLL_OUTPUT = self.json_escape(LOG_LLVMIR2HLL_OUTPUT) + + log_structure = '{\n\t\'input_file\' : \'%s\',\n\t\'pdb_file\' : \'%s\',\n\t\'start_date\' :' \ + ' \'%s\',\n\t\'end_date\' : \'%s\',\n\t\'mode\' : \'%s\',\n\t\'arch\' : \'%s\',\n\t\'format\'' \ + ' : \'%s\',\n\t\'fileinfo_rc\' : \'%s\',\n\t\'unpacker_rc\' : \'%s\',\n\t\'bin2llvmir_rc\'' \ + ' : \'%s\',\n\t\'llvmir2hll_rc\' : \'%s\',\n\t\'fileinfo_output\' :' \ + ' \'%s\',\n\t\'unpacker_output\' : \'%s\',\n\t\'bin2llvmir_output\' :' \ + ' \'%s\',\n\t\'llvmir2hll_output\' : \'%s\',\n\t\'fileinfo_runtime\' :' \ + ' \'%s\',\n\t\'bin2llvmir_runtime\' : \'%s\',\n\t\'llvmir2hll_runtime\' :' \ + ' \'%s\',\n\t\'fileinfo_memory\' : \'%s\',\n\t\'bin2llvmir_memory\' :' \ + ' \'%s\',\n\t\'llvmir2hll_memory\' : \'%s\'\n}\n' + + print(log_structure % ( + self.input, self.args.pdb, LOG_DECOMPILATION_START_DATE, LOG_DECOMPILATION_END_DATE, self.args.mode, + self.args.arch, + FORMAT, LOG_FILEINFO_RC, LOG_UNPACKER_RC, LOG_BIN2LLVMIR_RC, LOG_LLVMIR2HLL_RC, + LOG_FILEINFO_OUTPUT, LOG_UNPACKER_OUTPUT, LOG_BIN2LLVMIR_OUTPUT, LOG_LLVMIR2HLL_OUTPUT, + LOG_FILEINFO_RUNTIME, LOG_BIN2LLVMIR_RUNTIME, LOG_LLVMIR2HLL_RUNTIME, LOG_FILEINFO_MEMORY, + LOG_BIN2LLVMIR_MEMORY, LOG_LLVMIR2HLL_MEMORY)) + + # + # Parses the given return code and output from a tool that was run through + # `/usr/bin/time -v` and prints the return code to be stored into the log. + # + # Parameters: + # + # - $1: return code from `/usr/bin/time` + # - $2: combined output from the tool and `/usr/bin/time -v` + # + # This function has to be called for every tool that is run through + # `/usr/bin/time`. The reason is that when a tool is run without + # `/usr/bin/time` and it e.g. segfaults, shell returns 139, but when it is run + # through `/usr/bin/time`, it returns 11 (139 - 128). If this is the case, this + # function prints 139 instead of 11 to make the return codes of all tools + # consistent. + # + def get_tool_rc(self, return_code, output): + global BASH_REMATCH + global RC + + orig_rc = return_code + signal_regex = 'Command terminated by signal ([0-9]*)' + + if re.search(signal_regex, output): + signal_num = BASH_REMATCH[1] + RC = signal_num + 128 + else: + RC = orig_rc + # We want to be able to distinguish assertions and memory-insufficiency + # errors. The problem is that both assertions and memory-insufficiency + # errors make the program exit with return code 134. We solve this by + # replacing 134 with 135 (SIBGUS, 7) when there is 'std::bad_alloc' in the + # output. So, 134 will mean abort (assertion error) and 135 will mean + # memory-insufficiency error. + if RC == 134 or re.search('std::bad_alloc', output): + RC = 135 + print(RC) + + return RC + + # + # Parses the given output ($1) from a tool that was run through + # `/usr/bin/time -v` and prints the running time in seconds. + # + def get_tool_runtime(self, output): + global USER_TIME_F + global SYSTEM_TIME_F + global RUNTIME_F + + # The output from `/usr/bin/time -v` looks like this: + # + # [..] (output from the tool) + # Command being timed: 'tool' + # User time (seconds): 0.04 + # System time (seconds): 0.00 + # [..] (other data) + # + # We combine the user and system times into a single time in seconds. + USER_TIME_F = os.popen('egrep \'User time \\(seconds\\').read().rstrip('\n') + ': <<< ' + ( + output) + ' | cut -d: -f2)' + + SYSTEM_TIME_F = os.popen('egrep \'System time \\(seconds\\').read().rstrip('\n') + ': <<< ' + ( + output) + ' | cut -d: -f2)' + RUNTIME_F = os.popen('echo ' + USER_TIME_F + ' + ' + SYSTEM_TIME_F + ' | bc').read().rstrip('\n') + # Convert the runtime from float to int (http://unix.stackexchange.com/a/89843). + # By adding 1, we make sure that the runtime is at least one second. This + # also takes care of proper rounding (we want to round runtime 1.1 to 2). + _rc0 = _rcr2, _rcw2 = os.pipe() + if os.fork(): + os.close(_rcw2) + os.dup2(_rcr2, 0) + subprocess.call(['bc'], shell=True) + else: + os.close(_rcr2) + os.dup2(_rcw2, 1) + print('(' + RUNTIME_F + ' + 1)/1') + # sys.exit(0) + + return RUNTIME_F + + # + # Parses the given output ($1) from a tool that was run through + # `/usr/bin/time -v` and prints the memory usage in MB. + # + def get_tool_memory_usage(self, tool): + global RSS_KB + global RSS_MB + + """The output from `/usr/bin/time -v` looks like this: + + [..] (output from the tool) + Command being timed: 'tool' + [..] (other data) + Maximum resident set size (kbytes): 1808 + [..] (other data) + + We want the value of 'resident set size' (RSS), which we convert from KB + to MB. If the resulting value is less than 1 MB, round it to 1 MB. + """ + RSS_KB = os.popen('egrep \'Maximum resident set size \\(kbytes\\').read().rstrip('\n') + ': <<< ' + ( + tool) + ' | cut -d: -f2)' + + RSS_MB = (RSS_KB // 1024) + print((RSS_MB if (RSS_MB > 0) else 1)) + + return RSS_MB + + # + # Prints the actual output of a tool that was run through `/usr/bin/time -v`. + # The parameter ($1) is the combined output from the tool and `/usr/bin/time -v`. + # + def get_tool_output(self, output): + # The output from `/usr/bin/time -v` looks either like this (success): + # + # [..] (output from the tool) + # Command being timed: 'tool' + # [..] (other data) + # + # or like this (when there was an error): + # + # [..] (output from the tool) + # Command exited with non-zero status X + # [..] (other data) + # + # Remove everything after and including 'Command...' + # (http://stackoverflow.com/a/5227429/2580955). + _rcr1, _rcw1 = os.pipe() + if os.fork(): + os.close(_rcw1) + os.dup2(_rcr1, 0) + subprocess.call(['sed', '-n', '/Command exited with non-zero status/q;p'], shell=True) + else: + os.close(_rcr1) + os.dup2(_rcw1, 1) + _rc0 = subprocess.Popen('sed' + ' ' + '-n' + ' ' + '/Command being timed:/q;p', shell=True, + stdin=subprocess.PIPE) + _rc0.communicate(output + '\n') + + return _rc0.wait() + # sys.exit(0) + + # + # Prints an escaped version of the given text so it can be inserted into JSON. + # + # Parameters: + # - $1 Text to be escaped. + # + def json_escape(self, text): + # We need to escape backslashes (\), double quotes ('), and replace new lines with '\n'. + + return re.escape(text) + + def remove_colors(self, text): + """Removes color codes from the given text ($1). + """ + # _rc0 = subprocess.Popen('sed' + ' ' + '-r' + ' ' + 's/\x1b[^m]*m//g', shell=True, stdin=subprocess.PIPE) + + res = re.compile(r's/\x1b[^m]*m//g') + return res.sub('', text) + + def timed_kill(self, pid): + """Platform-independent alternative to `ulimit -t` or `timeout`. + Based on http://www.bashcookbook.com/bashinfo/source/bash-4.0/examples/scripts/timeout3 + 1 argument is needed - PID + Returns - 1 if number of arguments is incorrect + 0 otherwise + """ + + global TIMEOUT + global timeout + + PID = pid + # PID of the target process + PROCESS_NAME = os.popen('ps -p ' + PID + ' -o comm --no-heading').read().rstrip('\n') + + if PROCESS_NAME == 'time': + # The program is run through `/usr/bin/time`, so get the PID of the + # child process (the actual program). Otherwise, if we killed + # `/usr/bin/time`, we would obtain no output from it (user time, memory + # usage etc.). + PID = os.popen('ps --ppid ' + PID + ' -o pid --no-heading | head -n1').read().rstrip('\n') + + if not TIMEOUT: + TIMEOUT = 300 + + timeout = TIMEOUT + t = timeout + + while t > 0: + time.sleep(1) + + if not subprocess.call(['kill', '-0', PID], shell=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL): + exit(0) + + t = t - 1 + + subprocess.call(['kill_tree', PID, 'SIGKILL'], shell=True, stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + + return 0 + + # + # Kill process and all its children. + # Based on http://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes/3211182#3211182 + # 2 arguments are needed - PID of process to kill + signal type + # Returns - 1 if number of arguments is incorrect + # 0 otherwise + # + def kill_tree(self, pid, signal_type): + """ TODO implement + _pid = pid + _sig = Expand.colonMinus('2', 'TERM') + _rc0 = subprocess.call(['kill', '-stop', Expand.underbar() + 'pid'], shell=True) + + # needed to stop quickly forking parent from producing child between child killing and parent killing + for _child in os.popen('ps -o pid --no-headers --ppid \'' + Expand.underbar() + 'pid\'').read().rstrip('\n'): + kill_tree(Expand.underbar() + 'child', Expand.underbar() + 'sig') + _rc0 = subprocess.call(['kill', '-' + Expand.underbar() + 'sig', Expand.underbar() + 'pid'], shell=True) + """ + + return 0 + + def string_to_md5(self, string): + """Generate a MD5 checksum from a given string. + """ + m = hashlib.md5() + m.update(string) + + return m.hexdigest() + + def decompile(self): + global TIME + # Check arguments and set default values for unset options. + self.check_arguments() + + # Initialize variables used by logging. + if self.args.generate_log: + LOG_DECOMPILATION_START_DATE = time.strftime('%s') # os.popen('date + %s').read().rstrip('\n') + # Put the tool log file and tmp file into /tmp because it uses tmpfs. This means that + # the data are stored in RAM instead on the disk, which should provide faster access. + tmp_dir = '/tmp/decompiler_log' + + os.makedirs(tmp_dir, exist_ok=True) + + file_md5 = self.string_to_md5(self.output) + tool_log_file = tmp_dir + '/' + file_md5 + '.tool' + + # Raw. + if self.args.mode == 'raw': + # Entry point for THUMB must be odd. + if self.args.arch == 'thumb' or (self.args.raw_entry_point % 2) != 0: + self.args.keep_unreachable_funcs = 1 + # RAW_ENTRY_POINT = (RAW_ENTRY_POINT + 1) + + # Check for archives. + if self.args.mode == 'bin': + # Check for archives packed in Mach-O Universal Binaries. + print('##### Checking if file is a Mach-O Universal static library...') + print('RUN: ' + config.EXTRACT + ' --list ' + self.input) + + if Utils.is_macho_archive(self.input): + out_archive = self.output + '.a' + if self.args.arch: + print() + print('##### Restoring static library with architecture family ' + self.args.arch + '...') + print( + 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' + self.input) + if ( + not subprocess.call( + [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input], + shell=True)): + # Architecture not supported + print( + 'Invalid --arch option \'' + self.args.arch + '\'. File contains these architecture families:') + subprocess.call([config.EXTRACT, '--list', self.input], shell=True) + self.cleanup() + sys.exit(1) + else: + # Pick best architecture + print() + print('##### Restoring best static library for decompilation...') + print('RUN: ' + config.EXTRACT + ' --best --out ' + out_archive + ' ' + self.input) + subprocess.call([config.EXTRACT, '--best', '--out', out_archive, self.input], shell=True) + self.input = out_archive + + print() + print('##### Checking if file is an archive...') + print('RUN: ' + config.AR + ' --arch-magic ' + self.input) + + if Utils.has_archive_signature(self.input): + print('This file is an archive!') + + # Check for thin signature. + if Utils.has_thin_archive_signature(self.input): + self.cleanup() + Utils.print_error('File is a thin archive and cannot be decompiled.') + + # Check if our tools can handle it. + if not Utils.is_valid_archive(self.input): + self.cleanup() + Utils.print_error('The input archive has invalid format.') + + # Get and check number of objects. + arch_object_count = Utils.archive_object_count(self.input) + if arch_object_count <= 0: + self.cleanup() + Utils.print_error('The input archive is empty.') + + # Prepare object output path. + out_restored = self.output + '.restored' + + # Pick object by index. + if self.args.ar_index: + print() + print('##### Restoring object file on index '' + (self.args.ar_index) + '' from archive...') + print( + 'RUN: ' + config.AR + ' ' + self.input + ' --index ' + self.args.ar_index + ' --output ' + out_restored) + + if not Utils.archive_get_by_index(self.input, self.args.ar_index, out_restored): + self.cleanup() + valid_index = (arch_object_count - 1) + + if valid_index != 0: + Utils.print_error('File on index \'' + ( + self.args.ar_index) + '\' was not found in the input archive. Valid indexes are 0-' + ( + valid_index) + '.') + else: + Utils.print_error('File on index \'' + ( + self.args.ar_index) + '\' was not found in the input archive. The only valid index is 0.') + self.input = out_restored + elif self.args.ar_name: + print() + print('##### Restoring object file with name '' + (self.args.ar_name) + '' from archive...') + print( + 'RUN: ' + config.AR + ' ' + self.input + ' --name ' + self.args.ar_name + ' --output ' + out_restored) + + if not Utils.archive_get_by_name(self.input, self.args.ar_name, out_restored): + self.cleanup() + Utils.print_error('File named %s was not found in the input archive.' % self.args.ar_name) + self.input = out_restored + else: + # Print list of files. + print('Please select file to decompile with either \' --ar-index = n\'') + print('or \' --ar-name = string\' option. Archive contains these files:') + + Utils.archive_list_numbered_content(self.input) + self.cleanup() + sys.exit(1) + else: + if self.args.ar_name: + Utils.print_warning('Option --ar-name can be used only with archives.') + + if self.args.ar_index: + Utils.print_warning('Option --ar-index can be used only with archives.') + + print('Not an archive, going to the next step.') + + if self.args.mode == 'bin' or self.args.mode == 'raw': + # Assignment of other used variables. + name = os.path.splitext(self.output)[0] + self.out_unpacked = name + '-unpacked' + out_frontend = self.output + '.frontend' + self.out_frontend_ll = out_frontend + '.ll' + self.out_frontend_bc = out_frontend + '.bc' + self.config = self.output + '.json' + + if self.config != self.args.config: + Utils.remove_forced(self.config) + + if self.args.config: + shutil.copyfile(self.args.config, self.config) + + # Preprocess existing file or create a new, empty JSON file. + if os.path.isfile(self.config): + subprocess.call([config.CONFIGTOOL, self.config, '--preprocess'], shell=True) + else: + print('{}', file=open(self.config, 'wb')) + + # Raw data needs architecture, endianess and optionaly sections's vma and entry point to be specified. + if self.args.mode == 'raw': + if not self.args.arch or self.args.arch == 'unknown' or self.args.arch == '': + Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) + + if not self.args.endian: + Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) + + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--format', 'raw'], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32'], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--file-class', '32'], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian], shell=True) + + if self.args.raw_entry_point: + subprocess.call( + [config.CONFIGTOOL, self.config, '--write', '--entry-point', self.args.raw_entry_point], + shell=True) + + if self.args.raw_section_vma: + subprocess.call( + [config.CONFIGTOOL, self.config, '--write', '--section-vma', self.args.raw_section_vma], + shell=True) + + # + # Call fileinfo to create an initial config file. + # + fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] + + if self.args.fileinfo_verbose: + fileinfo_params.extend(['-c', self.config, '--similarity', '--verbose', self.input]) + + for par in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto', par]) + + if self.args.fileinfo_use_all_external_patterns: + for par in config.FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto', par]) + + if self.args.max_memory: + fileinfo_params.extend(['--max-memory', self.args.max_memory]) + elif not self.args.no_memory_limit: + # By default, we want to limit the memory of fileinfo into half of + # system RAM to prevent potential black screens on Windows (#270). + fileinfo_params.append('--max-memory-half-ram') + + print() + print('##### Gathering file information...') + print('RUN: ' + config.FILEINFO + ' ' + ' '.join(fileinfo_params)) + + if self.args.generate_log != '': + FILEINFO_AND_TIME_OUTPUT = os.popen(TIME + ' \'' + config.FILEINFO + '\' \'' + + ' '.join(fileinfo_params) + '\' 2>&1').read().rstrip('\n') + + fileinfo_rc = 0 # _rc0 #TODO use fileinfo rc + LOG_FILEINFO_RC = self.get_tool_rc(fileinfo_rc, FILEINFO_AND_TIME_OUTPUT) + + LOG_FILEINFO_RUNTIME = self.get_tool_runtime(FILEINFO_AND_TIME_OUTPUT) + LOG_FILEINFO_MEMORY = self.get_tool_memory_usage(FILEINFO_AND_TIME_OUTPUT) + LOG_FILEINFO_OUTPUT = self.get_tool_output(FILEINFO_AND_TIME_OUTPUT) + print(LOG_FILEINFO_OUTPUT) + else: + fileinfo_rc = subprocess.call([config.FILEINFO, ' '.join(fileinfo_params)], shell=True) + + if fileinfo_rc != 0: + if self.args.generate_log: + self.generate_log() + + self.cleanup() + # The error message has been already reported by fileinfo in stderr. + Utils.print_error('') + + self.check_whether_decompilation_should_be_forcefully_stopped('fileinfo') + + ## + ## Unpacking. + ## + unpack_params = ['--extended-exit-codes', '--output', self.out_unpacked, self.input] + + if self.args.max_memory: + unpack_params.extend(['--max-memory', self.args.max_memory]) + elif not self.args.no_memory_limit: + # By default, we want to limit the memory of retdec-unpacker into half + # of system RAM to prevent potential black screens on Windows (#270). + unpack_params.append('--max-memory-half-ram') + + unpacker = Unpacker(unpack_params) + if self.args.generate_log: + # we should get the output from the unpacker tool + log_unpacker_output, unpacker_rc = unpacker.unpack_all() + LOG_UNPACKER_RC = unpacker_rc + else: + _, unpacker_rc = unpacker.unpack_all() + + self.check_whether_decompilation_should_be_forcefully_stopped('unpacker') + + # RET_UNPACK_OK=0 + # RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK=1 + # RET_NOTHING_TO_DO=2 + # RET_UNPACKER_FAILED_OTHERS_OK=3 + # RET_UNPACKER_FAILED=4 + if unpacker_rc == 0 or unpacker_rc == 1 or unpacker_rc == 3: + # Successfully unpacked -> re-run fileinfo to obtain fresh information. + self.input = self.out_unpacked + fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] + + if self.args.fileinfo_verbose: + fileinfo_params = ['-c', self.config, '--similarity', '--verbose', self.input] + + for pd in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto ', pd]) + + if self.args.fileinfo_use_all_external_patterns: + for ed in config.FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES: + fileinfo_params.extend(['--crypto ', ed]) + + if self.args.max_memory: + fileinfo_params.extend(['--max-memory', self.args.max_memory]) + elif not self.args.no_memory_limit: + # By default, we want to limit the memory of fileinfo into half of + # system RAM to prevent potential black screens on Windows (#270). + fileinfo_params.append('--max-memory-half-ram') + + print() + print('##### Gathering file information after unpacking...') + print('RUN: ' + config.FILEINFO + ' ' + ' '.join(fileinfo_params)) + + if self.args.generate_log: + FILEINFO_AND_TIME_OUTPUT = os.popen( + TIME + ' \'' + config.FILEINFO + '\' \'' + ' '.join(fileinfo_params) + '\' 2>&1').read().rstrip( + '\n') + + fileinfo_rc = 0 # _rc0 + + LOG_FILEINFO_RC = self.get_tool_rc(fileinfo_rc, FILEINFO_AND_TIME_OUTPUT) + FILEINFO_RUNTIME = self.get_tool_runtime(FILEINFO_AND_TIME_OUTPUT) + LOG_FILEINFO_RUNTIME = (LOG_FILEINFO_RUNTIME + FILEINFO_RUNTIME) + FILEINFO_MEMORY = self.get_tool_memory_usage(FILEINFO_AND_TIME_OUTPUT) + LOG_FILEINFO_MEMORY = (LOG_FILEINFO_MEMORY + FILEINFO_MEMORY) / 2 + LOG_FILEINFO_OUTPUT = self.get_tool_output(FILEINFO_AND_TIME_OUTPUT) + print(LOG_FILEINFO_OUTPUT) + else: + fileinfo_rc = subprocess.call([config.FILEINFO, ' '.join(fileinfo_params)], shell=True) + + if fileinfo_rc != 0: + if self.args.generate_log: + self.generate_log() + + self.cleanup() + # The error message has been already reported by fileinfo in stderr. + Utils.print_error('') + + self.print_warning_if_decompiling_bytecode() + + # Check whether the architecture was specified. + if self.args.arch: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) + else: + # Get full name of the target architecture including comments in parentheses + ARCH_FULL = os.popen( + '\'' + config.CONFIGTOOL + '\' \'' + self.config + '\' --read --arch | awk \'{print tolower($0').read().rstrip( + '\n') + + # Strip comments in parentheses and all trailing whitespace + # todo (ARCH_FULL % (*) what is this + self.args.arch = ARCH_FULL # os.popen('echo ' + (ARCH_FULL % (*) + ' | sed -e s / ^ [[: space:]] * // \'').read().rstrip('\n') + + # Get object file format. + FORMAT = os.popen( + '\'' + config.CONFIGTOOL + '\' \'' + self.config + '\' --read --format | awk \'{print tolower($1').read().rstrip( + '\n') + + # Intel HEX needs architecture to be specified + if FORMAT == 'ihex': + if not self.args.arch or self.args.arch == 'unknown': + Utils.print_error('Option -a|--arch must be used with format ' + FORMAT) + + if not self.args.endian: + Utils.print_error('Option -e|--endian must be used with format ' + FORMAT) + + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32'], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--file-class', '32'], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian], shell=True) + + # Check whether the correct target architecture was specified. + if self.args.arch == 'arm' or self.args.arch == 'thumb': + ords_dir = config.ARM_ORDS_DIR + elif self.args.arch == 'x86': + ords_dir = config.X86_ORDS_DIR + elif self.args.arch == 'powerpc' or self.args.arch == 'mips' or self.args.arch == 'pic32': + pass + else: + # nothing + if self.args.generate_log: + self.generate_log() + + self.cleanup() + Utils.print_error( + 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.args.arch) + + # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. + # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. + cmd = CmdRunner() + fileclass, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--file-class']) + # FILECLASS = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --file-class').read().rstrip('\n') + + if fileclass != '16' or fileclass != '32': + if self.args.generate_log: + self.generate_log() + + self.cleanup() + Utils.print_error( + 'Unsupported target format '' + (FORMAT^^) + (FILECLASS) + ''. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.') + + # Set path to statically linked code signatures. + # + # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, or use both, for RAW. + sig_format = FORMAT + + if sig_format == 'ihex' or sig_format == 'raw': + sig_format = 'elf' + + endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--endian']) + # ENDIAN = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --endian').read().rstrip('\n') + + if endian_result == 'little': + sig_endian = 'le' + elif endian_result == 'big': + sig_endian = 'be' + else: + sig_endian = '' + + sig_arch = self.args.arch + + if sig_arch == 'pic32': + sig_arch = 'mips' + + signatures_dir = os.path.join(config.GENERIC_SIGNATURES_DIR, sig_format, fileclass, sig_endian) + # SIGNATURES_DIR = config.GENERIC_SIGNATURES_DIR + '/' + SIG_FORMAT + '/' + FILECLASS + '/' + SIG_ENDIAN + '/' + ( + # SIG_ARCH) + + self.print_warning_if_decompiling_bytecode() + + # Decompile unreachable functions. + if self.args.keep_unreachable_funcs: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true'], + shell=True) + + # Get signatures from selected archives. + if len(self.args.static_code_archive) > 0: + print() + print('##### Extracting signatures from selected archives...') + + lib_index = 0 + for lib in self.args.static_code_archive: + + print('Extracting signatures from file \'%s\'', lib) + CROP_ARCH_PATH = os.popen( + 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'').read().rstrip('\n') + sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' + + # if (subprocess.call(config.SIG_FROM_LIB + ' ' + lib + ' ' + '--output' + ' ' + SIG_OUT, shell=True, + # stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL)): + # Call sig from lib tool + sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) + if sig_from_lib.run(): + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out], + shell=True) + signatures_to_remove = [sig_out] + else: + Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') + + lib_index += 1 + + # Store paths of signature files into config for frontend. + if not self.args.no_default_static_signatures: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--signatures', signatures_dir], shell=True) + + # User provided signatures. + for i in self.args.static_code_sigfile: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', i], shell=True) + + # Store paths of type files into config for frontend. + # TODO doesnt even exist in sh except here + # if os.path.isdir(GENERIC_TYPES_DIR): + # subprocess.call([config.CONFIGTOOL, CONFIG, '--write', '--types', GENERIC_TYPES_DIR], shell=True) + + # Store path of directory with ORD files into config for frontend (note: only directory, not files themselves). + if os.path.isdir(ords_dir): + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + '/'], shell=True) + + # Store paths to file with PDB debugging information into config for frontend. + if os.path.exists(self.args.pdb): + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb], shell=True) + + # Store file names of input and output into config for frontend. + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--input-file', self.input], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--frontend-output-file', self.out_frontend_ll], + shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--output-file', self.output], shell=True) + + # Store decode only selected parts flag. + if self.args.selected_decode_only: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'true'], + shell=True) + else: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'false'], + shell=True) + + # Store selected functions or selected ranges into config for frontend. + if self.args.selected_functions: + for f in self.args.selected_functions: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--selected-func', f], shell=True) + + if self.args.selected_ranges: + for r in self.args.selected_ranges: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--selected-range', r], shell=True) + + # Assignment of other used variables. + # We have to ensure that the .bc version of the decompiled .ll file is placed + # in the same directory as are other output files. Otherwise, there may be + # race-condition problems when the same input .ll file is decompiled in + # parallel processes because they would overwrite each other's .bc file. This + # is most likely to happen in regression tests in the 'll' mode. + out_backend = self.output + '.backend' + + # If the input file is the same as $OUT_BACKEND_LL below, then we have to change the name of + # $OUT_BACKEND. Otherwise, the input file would get overwritten during the conversion. + if self.out_frontend_ll == out_backend + '.ll': + out_backend = self.output + '.backend.backend' + + out_backend_bc = out_backend + '.bc' + OUT_BACKEND_LL = out_backend + '.ll' + + bin2llvmir_params = [] + ## + ## Decompile the binary into LLVM IR. + ## + if self.args.keep_unreachable_funcs: + # Prevent bin2llvmir from removing unreachable functions. + bin2llvmir_params = os.popen( + 'sed \' s / -unreachable-funcs * // g\' <<< \'' + config.BIN2LLVMIR_PARAMS + '\'').read().rstrip( + '\n') + + if not self.config and self.args.config: + self.config = self.args.config + bin2llvmir_params.extend(['-config-path', self.config]) + + if self.args.max_memory: + bin2llvmir_params.extend(['-max-memory', self.args.max_memory]) + elif not self.args.no_memory_limit: + # By default, we want to limit the memory of bin2llvmir into half of + # system RAM to prevent potential black screens on Windows (#270). + bin2llvmir_params.append('-max-memory-half-ram') + + print() + print('##### Decompiling ' + self.input + ' into ' + out_backend_bc + '...') + print('RUN: ' + config.BIN2LLVMIR + ' ' + ' '.join(bin2llvmir_params) + ' -o ' + out_backend_bc) + + if self.args.generate_log: + PID = 0 + bin2llvmir_rc = 0 + + def thread1(): + subprocess.call([TIME, config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', ' '.join( + out_backend_bc)], shell=True, stdout=open(tool_log_file, 'wb'), stderr=subprocess.STDOUT) + + threading.Thread(target=thread1).start() + + PID = 0 # TODO $! Expand.exclamation() + + def thread2(): + self.timed_kill(PID) + + threading.Thread(target=thread2).start() + + # subprocess.call(['wait', PID], shell=True, stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) + os.kill(PID, 0) + + bin2llvmir_rc = 0 # TODO use rc _rc2 + BIN2LLVMIR_AND_TIME_OUTPUT = os.popen('cat \'' + tool_log_file + '\'').read().rstrip('\n') + LOG_BIN2LLVMIR_RC = self.get_tool_rc(bin2llvmir_rc, BIN2LLVMIR_AND_TIME_OUTPUT) + LOG_BIN2LLVMIR_RUNTIME = self.get_tool_runtime(BIN2LLVMIR_AND_TIME_OUTPUT) + LOG_BIN2LLVMIR_MEMORY = self.get_tool_memory_usage(BIN2LLVMIR_AND_TIME_OUTPUT) + LOG_BIN2LLVMIR_OUTPUT = self.get_tool_output(BIN2LLVMIR_AND_TIME_OUTPUT) + print(LOG_BIN2LLVMIR_OUTPUT, end='') + else: + bin2llvmir_rc = subprocess.call([config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', out_backend_bc], + shell=True) + + if bin2llvmir_rc != 0: + if self.args.generate_log: + self.generate_log() + + self.cleanup() + Utils.print_error('Decompilation to LLVM IR failed') + + self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir') + + # modes 'bin' || 'raw' + + # LL mode goes straight to backend. + if self.args.mode == 'll': + out_backend_bc = self.input + self.config = self.args.config + + # Create parameters for the $LLVMIR2HLL call. + llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + ( + self.args.backend_var_renamer), '-var-name-gen=fruit', '-var-name-gen-prefix=', '-call-info-obtainer=' + ( + self.args.backend_call_info_obtainer), '-arithm-expr-evaluator=' + ( + self.args.backend_arithm_expr_evaluator), '-validate-module', + '-llvmir2bir-converter=' + ( + self.args.backend_llvmir2bir_converter), '-o', self.output, out_backend_bc] + + if self.args.backend_no_debug: + llvmir2hll_params.append('-enable-debug') + + if self.args.backend_no_debug_comments: + llvmir2hll_params.append('-emit-debug-comments') + + if self.args.config: + llvmir2hll_params.append('-config-path=' + self.args.config) + + if self.args.backend_semantics: + llvmir2hll_params.extend(['-semantics', self.args.backend_semantics]) + + if self.args.backend_enabled_opts: + llvmir2hll_params.append('-enabled-opts=' + self.args.backend_enabled_opts) + + if self.args.backend_disabled_opts: + llvmir2hll_params.append('-disabled-opts=' + self.args.backend_disabled_opts) + + if self.args.backend_no_opts: + llvmir2hll_params.append('-no-opts') + + if self.args.backend_aggressive_opts: + llvmir2hll_params.append('-aggressive-opts') + + if self.args.backend_no_var_renaming: + llvmir2hll_params.append('-no-var-renaming') + + if self.args.backend_no_symbolic_names: + llvmir2hll_params.append('-no-symbolic-names') + + if self.args.backend_keep_all_brackets: + llvmir2hll_params.append('-keep-all-brackets') + + if self.args.backend_keep_library_funcs: + llvmir2hll_params.append('-keep-library-funcs') + + if self.args.backend_no_time_varying_info: + llvmir2hll_params.append('-no-time-varying-info') + + if self.args.backend_no_compound_operators: + llvmir2hll_params.append('-no-compound-operators') + + if self.args.backend_find_patterns: + llvmir2hll_params.extend(['-find-patterns', self.args.backend_find_patterns]) + + if self.args.backend_emit_cg: + llvmir2hll_params.append('-emit-cg') + + if self.args.backend_force_module_name: + llvmir2hll_params.append('-force-module-name=' + self.args.backend_force_module_name) + + if self.args.backend_strict_fpu_semantics: + llvmir2hll_params.append('-strict-fpu-semantics') + + if self.args.backend_emit_cfg: + llvmir2hll_params.append('-emit-cfgs') + + if self.args.backend_cfg_test: + llvmir2hll_params.append('--backend-cfg-test') + + if self.args.max_memory: + llvmir2hll_params.extend(['-max-memory', self.args.max_memory]) + elif not self.args.no_memory_limit: + # By default, we want to limit the memory of llvmir2hll into half of system + # RAM to prevent potential black screens on Windows (#270). + llvmir2hll_params.append('-max-memory-half-ram') + + # Decompile the optimized IR code. + print() + print('##### Decompiling ' + out_backend_bc + ' into ' + self.output + '...') + print('RUN: ' + config.LLVMIR2HLL + ' ' + ' '.join(llvmir2hll_params)) + + if self.args.generate_log: + PID = 0 + + def thread3(): + subprocess.call([TIME, config.LLVMIR2HLL] + llvmir2hll_params, shell=True, stdout=open( + tool_log_file, 'wb'), stderr=subprocess.STDOUT) + + threading.Thread(target=thread3).start() + + PID = 0 # TODO Expand.exclamation() + + def thread4(): + self.timed_kill(PID) + + threading.Thread(target=self.thread4).start() + + os.kill(PID, 0) + # subprocess.call(['wait', PID], shell=True, stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) + + llvmir2hll_rc = 0 # use rc _rc4 + LLVMIR2HLL_AND_TIME_OUTPUT = os.popen('cat \'' + tool_log_file + '\'').read().rstrip('\n') + LOG_LLVMIR2HLL_RC = self.get_tool_rc(llvmir2hll_rc, LLVMIR2HLL_AND_TIME_OUTPUT) + LOG_LLVMIR2HLL_RUNTIME = self.get_tool_runtime(LLVMIR2HLL_AND_TIME_OUTPUT) + LOG_LLVMIR2HLL_MEMORY = self.get_tool_memory_usage(LLVMIR2HLL_AND_TIME_OUTPUT) + LOG_LLVMIR2HLL_OUTPUT = self.get_tool_output(LLVMIR2HLL_AND_TIME_OUTPUT) + + print(LOG_LLVMIR2HLL_OUTPUT) + # Wait a bit to ensure that all the memory that has been assigned to the tool was released. + time.sleep(0.1) + else: + llvmir2hll_rc = subprocess.call([config.LLVMIR2HLL, ' '.join(llvmir2hll_params)], shell=True) + + if llvmir2hll_rc != 0: + if self.args.generate_log: + self.generate_log() + + self.cleanup() + Utils.print_error('Decompilation of file %s failed' % out_backend_bc) + + self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll') + + # Convert .dot graphs to desired format. + if ((self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto') or ( + self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto')): + print() + print('##### Converting .dot files to the desired format...') + + if self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto': + print( + 'RUN: dot -T' + self.args.graph_format + ' ' + self.output + '.cg.dot > ' + self.output + '.cg.' + self.args.graph_format) + + subprocess.call(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], shell=True, + stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) + + if self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto': + for cfg in glob.glob(self.output + '.cfg.*.dot'): + print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( + os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) + + subprocess.call(['dot', '-T' + self.args.graph_format, cfg], shell=True, + stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) + + # Remove trailing whitespace and the last redundant empty new line from the + # generated output (if any). It is difficult to do this in the back-end, so we + # do it here. + # Note: Do not use the -i flag (in-place replace) as there is apparently no way + # of getting sed -i to work consistently on both MacOS and Linux. + # TODO + _rc4 = subprocess.call( + 'sed' + ' ' + '-e' + ' ' + ':a' + ' ' + '-e' + ' ' + '/^\\n*$/{$d;N;};/\\n$/ba' + ' ' + '-e' + ' ' + 's/[[:space:]]*$//', + shell=True, stdin=open(self.output, 'rb'), stdout=open(self.output + '.tmp', 'wb')) + + shutil.move(self.output + '.tmp', self.output) + + # Colorize output file. + if self.args.color_for_ida: + subprocess.call([config.IDA_COLORIZER, self.output, self.config], shell=True) + + # Store the information about the decompilation into the JSON file. + if self.args.generate_log: + self.generate_log() + + # Success! + self.cleanup() + print() + print('##### Done!') + + """ + while True: + + if (sys.argv[1]) == '-a' or (sys.argv[1]) == '--arch': + # Target architecture. + if (self.args.arch) != '': + subprocess.call(['print_error', 'Duplicate option: -a|--arch'], shell=True) + if (sys.argv[ + 2]) != 'mips' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'arm' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'powerpc' os.path.exists((sys.argv[2]))'!=' != '': + subprocess.call(['print_error', + 'Unsupported target architecture '' + (sys.argv[2]) + ''. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.'], + shell=True) + self.args.arch = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '-e' or (sys.argv[1]) == '--endian': + # Endian. + if ENDIAN != '': + utils.print_error('Duplicate option: -e|--endian') + ENDIAN = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '-h' or (sys.argv[1]) == '--help': + # Help. + print_help() + exit(0) + elif (sys.argv[1]) == '-k' or (sys.argv[1]) == '--keep-unreachable-funcs': + # Keep unreachable functions. + # Do not check if this parameter is a duplicate because when both + # --select-ranges or --select--functions and -k is specified, the + # decompilation fails. + self.args.keep_unreachable_funcs = 1 + subprocess.call(['shift'], shell=True) + elif (sys.argv[1]) == '-l' or (sys.argv[1]) == '--target-language': + # Target language. + if (HLL) != '': + utils.print_error('Duplicate option: -l|--target-language') + if (sys.argv[2]) != 'c' and os.path.exists((sys.argv[2])) != '': + utils.print_error('Unsupported target language '' + (sys.argv[2]) + ''. Supported languages: C, Python.') + HLL = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '-m' or (sys.argv[1]) == '--mode': + # Decompilation mode. + if (MODE) != '': + utils.print_error('Duplicate option: -m|--mode') + if (sys.argv[2]) != 'bin' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'raw': + utils.print_error('Unsupported decompilation mode '' + (sys.argv[2]) + ''. Supported modes: bin, ll, raw.') + MODE = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '-o' or (sys.argv[1]) == '--output': + # Output file. + if (OUT) != '': + subprocess.call(['print_error', 'Duplicate option: -o|--output'], shell=True) + OUT = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '-p' or (sys.argv[1]) == '--pdb': + # File containing PDB debug information. + if (self.args.pdb) != '': + subprocess.call(['print_error', 'Duplicate option: -p|--pdb'], shell=True) + self.args.pdb = sys.argv[2] + if not os.access, R_OK) ): + subprocess.call( + ['print_error', 'The input PDB file '' + (self.args.pdb) + '' does not exist or is not readable'], + shell=True) + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-aggressive-opts': + # Enable aggressive optimizations. + if (self.args.backend_aggressive_opts) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-aggressive-opts'], shell=True) + self.args.backend_aggressive_opts = 1 + subprocess.call(['shift'], shell=True) + elif (sys.argv[1]) == '--backend-arithm-expr-evaluator': + # Name of the evaluator of arithmetical expressions. + if (self.args.backend_arithm_expr_evaluator) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-arithm-expr-evaluator'], shell=True) + self.args.backend_arithm_expr_evaluator = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-call-info-obtainer': + # Name of the obtainer of information about function calls. + if (self.args.backend_call_info_obtainer) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-call-info-obtainer'], shell=True) + self.args.backend_call_info_obtainer = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-cfg-test': + # Unify the labels in the emitted CFG. + if (self.args.backend_cfg_test) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-cfg-test'], shell=True) + self.args.backend_cfg_test = 1 + subprocess.call(['shift'], shell=True) + elif (sys.argv[1]) == '--backend-disabled-opts': + # List of disabled optimizations in the backend. + if (self.args.backend_disabled_opts) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-disabled-opts'], shell=True) + self.args.backend_disabled_opts = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-emit-cfg': + # Emit a CFG of each function in the backend IR. + if (self.args.backend_emit_cfg) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-emit-cfg'], shell=True) + self.args.backend_emit_cfg = 1 + subprocess.call(['shift'], shell=True) + elif (sys.argv[1]) == '--backend-emit-cg': + # Emit a CG of the decompiled module in the backend IR. + if (self.args.backend_emit_cg) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-emit-cg'], shell=True) + self.args.backend_emit_cg = 1 + subprocess.call(['shift'], shell=True) + elif (sys.argv[1]) == '--backend-cg-conversion': + # Should the CG from the backend be converted automatically into the desired format?. + if (self.args.backend_cg_conversion) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-cg-conversion'], shell=True) + if (sys.argv[2]) != 'auto' os.path.exists((sys.argv[2]))'!=' != '': + subprocess.call(['print_error', + 'Unsupported CG conversion mode '' + (sys.argv[2]) + ''. Supported modes: auto, manual.'], + shell=True) + self.args.backend_cg_conversion = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-cfg-conversion': + # Should CFGs from the backend be converted automatically into the desired format?. + if (self.args.backend_cfg_conversion) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-cfg-conversion'], shell=True) + if (sys.argv[2]) != 'auto' os.path.exists((sys.argv[2]))'!=' != '': + subprocess.call(['print_error', + 'Unsupported CFG conversion mode '' + (sys.argv[2]) + ''. Supported modes: auto, manual.'], + shell=True) + self.args.backend_cfg_conversion = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif (sys.argv[1]) == '--backend-enabled-opts': + # List of enabled optimizations in the backend. + if (self.args.backend_enabled_opts) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-enabled-opts'], shell=True) + self.args.backend_enabled_opts = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--backend-find-patterns'): + # Try to find patterns. + if (self.args.backend_find_patterns) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-find-patterns'], shell=True) + self.args.backend_find_patterns = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--backend-force-module-name'): + # Force the module's name in the backend. + if (self.args.backend_force_module_name) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-force-module-name'], shell=True) + self.args.backend_force_module_name = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--backend-keep-all-brackets'): + # Keep all brackets. + if (self.args.backend_keep_all_brackets) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-keep-all-brackets'], shell=True) + self.args.backend_keep_all_brackets = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-keep-library-funcs'): + # Keep library functions. + if (self.args.backend_keep_library_funcs) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-keep-library-funcs'], shell=True) + self.args.backend_keep_library_funcs = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-llvmir2bir-converter'): + # Name of the converter of LLVM IR to BIR. + if (self.args.backend_llvmir2bir_converter) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-llvmir2bir-converter'], shell=True) + self.args.backend_llvmir2bir_converter = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--backend-no-compound-operators'): + # Do not use compound operators. + if (self.args.backend_no_compound_operators) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-compound-operators'], shell=True) + self.args.backend_no_compound_operators = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-debug'): + # Emission of debug messages. + if (self.args.backend_no_debug) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-debug'], shell=True) + self.args.backend_no_debug = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-debug-comments'): + # Emission of debug comments. + if (self.args.backend_no_debug_comments) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-debug-comments'], shell=True) + self.args.backend_no_debug_comments = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-opts'): + # Disable backend optimizations. + if (self.args.backend_no_opts) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-opts'], shell=True) + self.args.backend_no_opts = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-symbolic-names'): + # Disable the conversion of constant arguments. + if (self.args.backend_no_symbolic_names) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-symbolic-names'], shell=True) + self.args.backend_no_symbolic_names = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-time-varying-info'): + # Do not emit any time-varying information. + if (self.args.backend_no_time_varying_info) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-time-varying-info'], shell=True) + self.args.backend_no_time_varying_info = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-no-var-renaming'): + # Disable renaming of variables in the backend. + if (self.args.backend_no_var_renaming) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-no-var-renaming'], shell=True) + self.args.backend_no_var_renaming = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-semantics'): + # The used semantics in the backend. + if (self.args.backend_semantics) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-semantics'], shell=True) + self.args.backend_semantics = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--backend-strict-fpu-semantics'): + # Use strict FPU semantics in the backend. + if (self.args.backend_strict_fpu_semantics) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-strict-fpu-semantics'], shell=True) + self.args.backend_strict_fpu_semantics = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--backend-var-renamer'): + # Used renamer of variable names. + if (self.args.backend_var_renamer) != '': + subprocess.call(['print_error', 'Duplicate option: --backend-var-renamer'], shell=True) + if (sys.argv[ + 2]) != 'address' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'readable' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'unified': + subprocess.call(['print_error', + 'Unsupported variable renamer '' + (sys.argv[2]) + ''. Supported renamers: address, hungarian, readable, simple, unified.'], + shell=True) + self.args.backend_var_renamer = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--raw-entry-point'): + # Entry point address for binary created from raw data. + if (RAW_ENTRY_POINT) != '': + subprocess.call(['print_error', 'Duplicate option: --raw-entry-point'], shell=True) + RAW_ENTRY_POINT = sys.argv[2] + # RAW_ENTRY_POINT='$(($2))' # evaluate hex address - probably not needed + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--raw-section-vma'): + # Virtual memory address for section created from raw data. + if (RAW_SECTION_VMA) != '': + subprocess.call(['print_error', 'Duplicate option: --raw-section-vma'], shell=True) + RAW_SECTION_VMA = sys.argv[2] + # RAW_SECTION_VMA='$(($2))' # evaluate hex address - probably not needed + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--self.cleanup'): + # Cleanup. + if (CLEANUP) != '': + subprocess.call(['print_error', 'Duplicate option: --self.cleanup'], shell=True) + CLEANUP = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--color-for-ida'): + if (self.args.color_for_ida) != '': + subprocess.call(['print_error', 'Duplicate option: --color-for-ida'], shell=True) + self.args.color_for_ida = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--config'): + if (CONFIG_DB) != '': + subprocess.call(['print_error', 'Duplicate option: --config'], shell=True) + if (NO_CONFIG) != '': + subprocess.call(['print_error', 'Option --config can not be used with option --no-config'], + shell=True) + CONFIG_DB = sys.argv[2] + if (not os.access((CONFIG_DB), R_OK) ): + subprocess.call(['print_error', + 'The input JSON configuration file '' + (CONFIG_DB) + '' does not exist or is not readable'], + shell=True) + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--no-config'): + if (NO_CONFIG) != '': + subprocess.call(['print_error', 'Duplicate option: --no-config'], shell=True) + if (CONFIG_DB) != '': + subprocess.call(['print_error', 'Option --no-config can not be used with option --config'], + shell=True) + NO_CONFIG = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--graph-format'): + # Format of graph files. + if (self.args.graph_format) != '': + subprocess.call(['print_error', 'Duplicate option: --graph-format'], shell=True) + if (sys.argv[2]) != 'pdf' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'svg': + subprocess.call(['print_error', + 'Unsupported graph format '' + (sys.argv[2]) + ''. Supported formats: pdf, png, svg.'], + shell=True) + self.args.graph_format = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--select-decode-only'): + if (self.args.selected_decode_only) != '': + subprocess.call(['print_error', 'Duplicate option: --select-decode-only'], shell=True) + self.args.selected_decode_only = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--select-functions'): + # List of selected functions. + if (self.args.selected_functions) != '': + subprocess.call(['print_error', 'Duplicate option: --select-functions'], shell=True) + IFS').setValue(',') + # parser line into array + self.args.keep_unreachable_funcs = 1 + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--select-ranges'): + # List of selected ranges. + if (self.args.selected_ranges) != '': + subprocess.call(['print_error', 'Duplicate option: --select-ranges'], shell=True) + self.args.selected_ranges = sys.argv[2] + IFS').setValue(',') + # parser line into array + self.args.keep_unreachable_funcs = 1 + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--stop-after'): + # Stop decompilation after the given tool. + if (STOP_AFTER) != '': + subprocess.call(['print_error', 'Duplicate option: --stop-after'], shell=True) + STOP_AFTER = sys.argv[2] + if (not re.search('^(fileinfo|unpacker|bin2llvmir|llvmir2hll)' + '$', (STOP_AFTER))): + subprocess.call(['print_error', 'Unsupported tool '' + (STOP_AFTER) + '' for --stop-after'], + shell=True) + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--static-code-sigfile'): + # User provided signature file. + if not os.path.isfile((sys.argv[2])): + subprocess.call(['print_error', 'Invalid .yara file '' + (sys.argv[2]) + '''], shell=True) + self.args.static_code_sigfile').setValue('(' + (sys.argv[2]) + ')') + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--static-code-archive'): + # User provided archive to create signature file from. + if not os.path.isfile((sys.argv[2])): + subprocess.call(['print_error', 'Invalid archive file '' + (sys.argv[2]) + '''], shell=True) + self.args.static_code_archive').setValue('(' + (sys.argv[2]) + ')') + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--no-default-static-signatures'): + self.args.no_default_static_signatures = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--fileinfo-verbose'): + # Enable --verbose mode in fileinfo. + if (self.args.fileinfo_verbose) != '': + subprocess.call(['print_error', 'Duplicate option: --fileinfo-verbose'], shell=True) + self.args.fileinfo_verbose = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--fileinfo-use-all-external-patterns'): + if (FILEINFO_USE_ALL_EXTERNAL_PATTERNS) != '': + subprocess.call(['print_error', 'Duplicate option: --fileinfo-use-all-external-patterns'], + shell=True) + FILEINFO_USE_ALL_EXTERNAL_PATTERNS = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--ar-name'): + # Archive decompilation by name. + if (self.args.ar_name) != '': + subprocess.call(['print_error', 'Duplicate option: --ar-name'], shell=True) + self.args.ar_name = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--ar-index'): + # Archive decompilation by index. + if (self.args.ar_index) != '': + subprocess.call(['print_error', 'Duplicate option: --ar-index'], shell=True) + self.args.ar_index = sys.argv[2] + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--max-memory'): + if (self.args.max_memory) != '': + subprocess.call(['print_error', 'Duplicate option: --max-memory'], shell=True) + if (self.args.no_memory_limit) != '': + subprocess.call(['print_error', 'Clashing options: --max-memory and --no-memory-limit'], shell=True) + self.args.max_memory = sys.argv[2] + if (not re.search(Str(Glob('^[0-9] + ' + '$')), (self.args.max_memory))): + subprocess.call(['print_error', + 'Invalid value for --max-memory: ' + (self.args.max_memory) + ' (expected a positive integer)'], + shell=True) + subprocess.call(['shift', '2'], shell=True) + elif ((sys.argv[1]) == '--no-memory-limit'): + if (self.args.no_memory_limit) != '': + subprocess.call(['print_error', 'Duplicate option: --no-memory-limit'], shell=True) + if (self.args.max_memory) != '': + subprocess.call(['print_error', 'Clashing options: --max-memory and --no-memory-limit'], shell=True) + self.args.no_memory_limit = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--generate-log'): + # Intentionally undocumented option. + # Used only for internal testing. + # NOT guaranteed it works everywhere (systems other than our internal test machines). + if (self.args.generate_log) != '': + subprocess.call(['print_error', 'Duplicate option: --generate-log'], shell=True) + self.args.generate_log = 1 + self.args.no_memory_limit = 1 + subprocess.call(['shift'], shell=True) + elif ((sys.argv[1]) == '--'): + # Input file. + if (Expand.hash() == 2): + IN = sys.argv[2] + if (not os.access((IN), R_OK) ): + subprocess.call( + ['print_error', 'The input file '' + (IN) + '' does not exist or is not readable'], + shell=True) + elif (Expand.hash() > 2): + # Invalid options. + subprocess.call( + ['print_error', 'Invalid options: '' + (sys.argv[2]) + '', '' + (sys.argv[3]) + '' ...'], + shell=True) + break + """ + + +if __name__ == '__main__': + args = parse_args() + + decompiler = Decompiler(args) + decompiler.decompile() From 2f51c6920986368dcd472dbe4d91a08f15803a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Fri, 15 Jun 2018 20:14:42 +0200 Subject: [PATCH 09/48] Decompiler script now runs successfully until unpacking --- scripts/retdec_archive_decompiler.py | 2 +- scripts/retdec_config.py | 80 ++-- scripts/retdec_decompiler.py | 433 ++++++++---------- .../retdec_signature_from_library_creator.py | 4 +- scripts/retdec_unpacker.py | 22 +- scripts/retdec_utils.py | 35 +- 6 files changed, 270 insertions(+), 306 deletions(-) mode change 100644 => 100755 scripts/retdec_decompiler.py diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 4e6f940cf..12bfeb1de 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -74,7 +74,7 @@ def _cleanup(self): """Cleans up all temporary files. No arguments accepted. """ - Utils.remove_forced(self.tmp_archive) + Utils.remove_dir_forced(self.tmp_archive) def _check_arguments(self): diff --git a/scripts/retdec_config.py b/scripts/retdec_config.py index 6861bf867..4304b766c 100644 --- a/scripts/retdec_config.py +++ b/scripts/retdec_config.py @@ -10,16 +10,16 @@ """ INSTALL_BIN_DIR = SCRIPT_DIR UNIT_TESTS_DIR = INSTALL_BIN_DIR -INSTALL_SHARE_DIR = INSTALL_BIN_DIR + '/../share/retdec' -INSTALL_SUPPORT_DIR = INSTALL_SHARE_DIR + '/support' -INSTALL_SHARE_YARA_DIR = INSTALL_SUPPORT_DIR + '/generic/yara_patterns' +INSTALL_SHARE_DIR = os.path.join(INSTALL_BIN_DIR, '..', 'share', 'retdec') +INSTALL_SUPPORT_DIR = os.path.join(INSTALL_SHARE_DIR, 'support') +INSTALL_SHARE_YARA_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'generic', 'yara_patterns') # generic configuration -GENERIC_TYPES_DIR = INSTALL_SUPPORT_DIR + '/generic/types' -GENERIC_SIGNATURES_DIR = INSTALL_SHARE_YARA_DIR + '/static-code' +GENERIC_TYPES_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'generic', 'types') +GENERIC_SIGNATURES_DIR = os.path.join(INSTALL_SHARE_YARA_DIR, 'static-code') # ARM-specific configuration -ARM_ORDS_DIR = INSTALL_SUPPORT_DIR + '/arm/ords' +ARM_ORDS_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'arm', 'ords') # X86-specific configuration -X86_ORDS_DIR = INSTALL_SUPPORT_DIR + '/x86/ords' +X86_ORDS_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'x86', 'ords') """BIN2LLVMIR parameters The following list of passes is -O3 @@ -40,39 +40,39 @@ - Optimization -phi2seq is needed to be run at the end and not to run two times. This is the reason why it is placed at the very end. """ -BIN2LLVMIR_PARAMS_DISABLES = '-disable-inlining -disable-simplify-libcalls' -BIN2LLVMIR_LLVM_PASSES_ONLY = '-instcombine -tbaa -targetlibinfo -basicaa -domtree -simplifycfg -domtree -early-cse' \ - ' -lower-expect -targetlibinfo -tbaa -basicaa -globalopt -mem2reg -instcombine' \ - ' -simplifycfg -basiccg -domtree -early-cse -lazy-value-info -jump-threading' \ - ' -correlated-propagation -simplifycfg -instcombine -simplifycfg -reassociate -domtree' \ - ' -loops -loop-simplify -lcssa -loop-rotate -licm -lcssa -instcombine -scalar-evolution' \ - ' -loop-simplifycfg -loop-simplify -aa -loop-accesses -loop-load-elim -lcssa -indvars' \ - ' -loop-idiom -loop-deletion -memdep -gvn -memdep -sccp -instcombine -lazy-value-info' \ - ' -jump-threading -correlated-propagation -domtree -memdep -dse -dce -bdce -adce -die' \ - ' -simplifycfg -instcombine -strip-dead-prototypes -globaldce -constmerge -constprop' \ - ' -instnamer -domtree -instcombine' +BIN2LLVMIR_PARAMS_DISABLES = ['-disable-inlining', '-disable-simplify-libcalls'] +BIN2LLVMIR_LLVM_PASSES_ONLY = ['-instcombine', '-tbaa', '-targetlibinfo', '-basicaa', '-domtree', '-simplifycfg', '-domtree', '-early-cse', + '-lower-expect', '-targetlibinfo', '-tbaa', '-basicaa', '-globalopt', '-mem2reg', '-instcombine', + '-simplifycfg', '-basiccg', '-domtree', '-early-cse', '-lazy-value-info', '-jump-threading', + '-correlated-propagation', '-simplifycfg', '-instcombine', '-simplifycfg', '-reassociate', '-domtree', + '-loops -loop-simplify', '-lcssa', '-loop-rotate', '-licm', '-lcssa', '-instcombine', '-scalar-evolution', + '-loop-simplifycfg', '-loop-simplify', '-aa', '-loop-accesses', '-loop-load-elim', '-lcssa', '-indvars', + '-loop-idiom', '-loop-deletion', '-memdep', '-gvn', '-memdep', '-sccp', '-instcombine', '-lazy-value-info', + '-jump-threading', '-correlated-propagation', '-domtree', '-memdep', '-dse', '-dce', '-bdce', '-adce', '-die', + '-simplifycfg -instcombine -strip-dead-prototypes -globaldce -constmerge -constprop' + '-instnamer', '-domtree', '-instcombine'] -BIN2LLVMIR_PARAMS = '-provider-init -decoder -verify -main-detection -idioms-libgcc -inst-opt -register' \ - ' -cond-branch-opt -syscalls -stack -constants -param-return -local-vars -inst-opt -simple-types' \ - ' -generate-dsm -remove-asm-instrs -class-hierarchy -select-fncs -unreachable-funcs -inst-opt' \ - ' -value-protect $BIN2LLVMIR_LLVM_PASSES_ONLY $BIN2LLVMIR_LLVM_PASSES_ONLY -simple-types' \ - ' -stack-ptr-op-remove -inst-opt -idioms -global-to-local -dead-global-assign -instcombine' \ - ' -phi2seq -value-protect $BIN2LLVMIR_PARAMS_DISABLES' +BIN2LLVMIR_PARAMS = ['-provider-init', '-decoder', '-verify', '-main-detection', '-idioms-libgcc', '-inst-opt', '-register', + '-cond-branch-opt', '-syscalls', '-stack', '-constants', '-param-return', '-local-vars', '-inst-opt', '-simple-types', + '-generate-dsm', '-remove-asm-instrs', '-class-hierarchy', '-select-fncs', '-unreachable-funcs', '-inst-opt', + '-value-protect'] + BIN2LLVMIR_LLVM_PASSES_ONLY + BIN2LLVMIR_LLVM_PASSES_ONLY + ['-simple-types', + '-stack-ptr-op-remove', '-inst-opt -idioms', '-global-to-local', '-dead-global-assign', '-instcombine', + '-phi2seq', '-value-protect'] + BIN2LLVMIR_PARAMS_DISABLES # Paths to tools. -FILEINFO = INSTALL_BIN_DIR + '/retdec-fileinfo' -FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES = [INSTALL_SHARE_YARA_DIR + '/signsrch/signsrch.yara'] -FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES = [INSTALL_SHARE_YARA_DIR + '/signsrch/signsrch_regex.yara'] -AR = INSTALL_BIN_DIR + '/retdec-ar-extractor' -BIN2PAT = INSTALL_BIN_DIR + '/retdec-bin2pat' -PAT2YARA = INSTALL_BIN_DIR + '/retdec-pat2yara' -CONFIGTOOL = INSTALL_BIN_DIR + '/retdec-config' -EXTRACT = INSTALL_BIN_DIR + '/retdec-macho-extractor' -DECOMPILER_SH = INSTALL_BIN_DIR + '/retdec-decompiler.sh' -ARCHIVE_DECOMPILER_PY = INSTALL_BIN_DIR + '/retdec_archive_decompiler.py' -SIG_FROM_LIB_PY = INSTALL_BIN_DIR + '/retdec_signature_from_library_creator.py' -UNPACK_PY = INSTALL_BIN_DIR + '/retdec_unpacker.py' -LLVMIR2HLL = INSTALL_BIN_DIR + '/retdec-llvmir2hll' -BIN2LLVMIR = INSTALL_BIN_DIR + '/retdec-bin2llvmir' -IDA_COLORIZER = INSTALL_BIN_DIR + '/retdec-color-c.py' -UNPACKER = INSTALL_BIN_DIR + '/retdec-unpacker' +FILEINFO = os.path.join(INSTALL_BIN_DIR, 'retdec-fileinfo') +FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES = [os.path.join(INSTALL_SHARE_YARA_DIR, 'signsrch', 'signsrch.yara')] +FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES = [os.path.join(INSTALL_SHARE_YARA_DIR, 'signsrch', 'signsrch_regex.yara')] +AR = os.path.join(INSTALL_BIN_DIR, 'retdec-ar-extractor') +BIN2PAT = os.path.join(INSTALL_BIN_DIR, 'retdec-bin2pat') +PAT2YARA = os.path.join(INSTALL_BIN_DIR, 'retdec-pat2yara') +CONFIGTOOL = os.path.join(INSTALL_BIN_DIR, 'retdec-config') +EXTRACT = os.path.join(INSTALL_BIN_DIR, 'retdec-macho-extractor') +DECOMPILER_SH = os.path.join(INSTALL_BIN_DIR, 'retdec-decompiler.sh') +ARCHIVE_DECOMPILER_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_archive_decompiler.py') +SIG_FROM_LIB_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_signature_from_library_creator.py') +UNPACK_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_unpacker.py') +LLVMIR2HLL = os.path.join(INSTALL_BIN_DIR, 'retdec-llvmir2hll') +BIN2LLVMIR = os.path.join(INSTALL_BIN_DIR, 'retdec-bin2llvmir') +IDA_COLORIZER = os.path.join(INSTALL_BIN_DIR, 'retdec-color-c.py') +UNPACKER = os.path.join(INSTALL_BIN_DIR, 'retdec-unpacker') diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py old mode 100644 new mode 100755 index c0804f047..b17cf29f6 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -16,7 +16,7 @@ from pathlib import Path import retdec_config as config -from retdec_utils import Utils, CmdRunner +from retdec_utils import Utils, CmdRunner, TimeMeasuredProcess from retdec_signature_from_library_creator import SigFromLib from retdec_unpacker import Unpacker @@ -58,6 +58,7 @@ def parse_args(): parser.add_argument('-m', '--mode', dest='mode', metavar='MODE', + default='bin', choices=['bin', 'll', 'raw'], help='Force the type of decompilation mode [bin|ll|raw]' '(default: ll if input\'s suffix is \'.ll\', bin otherwise).') @@ -65,7 +66,6 @@ def parse_args(): parser.add_argument('-o', '--output', dest='output', metavar='FILE', - default='file.ext', help='Output file.') parser.add_argument('-p', '--pdb', @@ -200,7 +200,7 @@ def parse_args(): help='Used renamer of variables [address|hungarian|readable|simple|unified]') parser.add_argument('--cleanup', - dest='self.cleanup', + dest='cleanup', help='Removes temporary files created during the decompilation.') parser.add_argument('--color-for-ida', @@ -245,12 +245,12 @@ def parse_args(): help='Decode only selected parts (functions/ranges). Faster decompilation, but worse results.') parser.add_argument('--select-functions', - dest='select_functions', + dest='selected_ranges', metavar='FUNCS', help='Specify a comma separated list of functions to decompile (example: fnc1,fnc2,fnc3).') parser.add_argument('--select-ranges', - dest='select_ranges', + dest='selected_ranges', metavar='RANGES', help='Specify a comma separated list of ranges to decompile ' '(example: 0x100-0x200,0x300-0x400,0x500-0x600).') @@ -294,6 +294,7 @@ def __init__(self, _args): self.output = '' self.config = '' + self.arch = '' self.out_unpacked = '' self.out_frontend_ll = '' self.out_frontend_bc = '' @@ -319,7 +320,7 @@ def check_arguments(self): # Try to detect desired decompilation mode if not set by user. # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). if not self.args.mode: - if Path(self.args.input).suffix == 'll': + if self.args.input.endswith('ll'): # Suffix .ll self.args.mode = 'll' else: @@ -367,48 +368,49 @@ def check_arguments(self): if self.args.ar_index: Utils.print_warning('Option --ar-index is not used in mode ' + self.args.mode) + fname = '' if not self.args.output: # No output file was given, so use the default one. - (iname, ext) = os.path.splitext(self.input) - - if ext == 'll': + fname = self.args.input + if fname.endswith('ll'): # Suffix .ll - self.output = iname + '.' + self.args.hll - elif ext == 'exe': + self.output = fname[:-2] + self.args.hll + elif fname.endswith('exe'): # Suffix .exe - self.output = iname + '.' + self.args.hll - elif ext == 'elf': + self.output = fname[:-3] + self.args.hll + print('Output is: ' + self.output) + elif fname.endswith('elf'): # Suffix .elf - self.output = iname + '.' + self.args.hll - elif ext == 'ihex': + self.output = fname[:-3] + self.args.hll + elif fname.endswith('ihex'): # Suffix .ihex - self.output = iname + '.' + self.args.hll - elif ext == 'macho': + self.output = fname[:-4] + self.args.hll + elif fname.endswith('macho'): # Suffix .macho - self.output = iname + '.' + self.args.hll + self.output = fname[:-5] + self.args.hll else: self.output = self.output + PICKED_FILE + '.' + self.args.hll - # If the output file name matches the input file name, we have to change the - # output file name. Otherwise, the input file gets overwritten. - if self.input == self.output: - self.output = iname + '.out.' + self.args.hll + # If the output file name matches the input file name, we have to change the + # output file name. Otherwise, the input file gets overwritten. + if self.args.input == self.output: + self.output = fname + '.out.' + self.args.hll - # Convert to absolute paths. - self.input = Utils.get_realpath(self.args.input) - self.output = Utils.get_realpath(self.args.output) + # Convert to absolute paths. + self.input = os.path.abspath(self.args.input) #Utils.get_realpath(self.args.input) + self.output = os.path.abspath(self.output) #Utils.get_realpath(self.output) - if os.path.exists(self.args.pdb): - self.args.pdb = Utils.get_realpath(self.args.pdb) + if self.args.pdb and os.path.exists(self.args.pdb): + self.args.pdb = Utils.get_realpath(self.args.pdb) - # Check that selected ranges are valid. - if self.args.selected_ranges: - for r in self.args.selected_ranges: - # Check if valid range. - if not Utils.is_range(r): - Utils.print_error( - 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' - '(e.g. 0x123-0xabc) range.' % r) + # Check that selected ranges are valid. + if self.args.selected_ranges: + for r in self.args.selected_ranges: + # Check if valid range. + if not Utils.is_range(r): + Utils.print_error( + 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' + '(e.g. 0x123-0xabc) range.' % r) # Check if first <= last. ranges = self.args.selected_ranges.split('-') @@ -418,6 +420,9 @@ def check_arguments(self): 'Range \'%s\' in option --select-ranges is not a valid range: ' 'second address must be greater or equal than the first one.' % ranges) + if self.args.arch: + self.arch = self.args.arch + def print_warning_if_decompiling_bytecode(self): """Prints a warning if we are decompiling bytecode.""" @@ -453,48 +458,31 @@ def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): def cleanup(self): """Cleanup working directory""" - if self.args.self.cleanup: - Utils.remove_forced(self.out_unpacked) - Utils.remove_forced(self.out_frontend_ll) - Utils.remove_forced(self.out_frontend_bc) + if self.args.cleanup: + Utils.remove_dir_forced(self.out_unpacked) + Utils.remove_dir_forced(self.out_frontend_ll) + Utils.remove_dir_forced(self.out_frontend_bc) if self.config != self.args.config: - Utils.remove_forced(self.config) + Utils.remove_dir_forced(self.config) - Utils.remove_forced(self.out_backend_bc) - Utils.remove_forced(self.out_backend_ll) - Utils.remove_forced(self.out_restored) + Utils.remove_dir_forced(self.out_backend_bc) + Utils.remove_dir_forced(self.out_backend_ll) + Utils.remove_dir_forced(self.out_restored) # Archive support - Utils.remove_forced(self.out_archive) + Utils.remove_dir_forced(self.out_archive) # Archive support (Macho-O Universal) for sig in self.signatures_to_remove: - Utils.remove_forced(sig) + Utils.remove_dir_forced(sig) # Signatures generated from archives if self.TOOL_LOG_FILE: - Utils.remove_forced(self.TOOL_LOG_FILE) + Utils.remove_dir_forced(self.TOOL_LOG_FILE) def generate_log(self): - global LOG_DECOMPILATION_END_DATE - global LOG_FILEINFO_OUTPUT - global LOG_UNPACKER_OUTPUT - global LOG_BIN2LLVMIR_OUTPUT - global LOG_LLVMIR2HLL_OUTPUT - global LOG_DECOMPILATION_START_DATE - global FORMAT - global LOG_FILEINFO_RC - global LOG_UNPACKER_RC - global LOG_BIN2LLVMIR_RC - global LOG_LLVMIR2HLL_RC - global LOG_FILEINFO_RUNTIME - global LOG_BIN2LLVMIR_RUNTIME - global LOG_LLVMIR2HLL_RUNTIME - global LOG_FILEINFO_MEMORY - global LOG_BIN2LLVMIR_MEMORY - global LOG_LLVMIR2HLL_MEMORY - + """ LOG_FILE = self.output + '.decompilation.log' LOG_DECOMPILATION_END_DATE = time.strftime('%S') @@ -522,6 +510,7 @@ def generate_log(self): LOG_FILEINFO_OUTPUT, LOG_UNPACKER_OUTPUT, LOG_BIN2LLVMIR_OUTPUT, LOG_LLVMIR2HLL_OUTPUT, LOG_FILEINFO_RUNTIME, LOG_BIN2LLVMIR_RUNTIME, LOG_LLVMIR2HLL_RUNTIME, LOG_FILEINFO_MEMORY, LOG_BIN2LLVMIR_MEMORY, LOG_LLVMIR2HLL_MEMORY)) + """ # # Parses the given return code and output from a tool that was run through @@ -563,54 +552,11 @@ def get_tool_rc(self, return_code, output): return RC - # - # Parses the given output ($1) from a tool that was run through - # `/usr/bin/time -v` and prints the running time in seconds. - # - def get_tool_runtime(self, output): - global USER_TIME_F - global SYSTEM_TIME_F - global RUNTIME_F - - # The output from `/usr/bin/time -v` looks like this: - # - # [..] (output from the tool) - # Command being timed: 'tool' - # User time (seconds): 0.04 - # System time (seconds): 0.00 - # [..] (other data) - # - # We combine the user and system times into a single time in seconds. - USER_TIME_F = os.popen('egrep \'User time \\(seconds\\').read().rstrip('\n') + ': <<< ' + ( - output) + ' | cut -d: -f2)' - - SYSTEM_TIME_F = os.popen('egrep \'System time \\(seconds\\').read().rstrip('\n') + ': <<< ' + ( - output) + ' | cut -d: -f2)' - RUNTIME_F = os.popen('echo ' + USER_TIME_F + ' + ' + SYSTEM_TIME_F + ' | bc').read().rstrip('\n') - # Convert the runtime from float to int (http://unix.stackexchange.com/a/89843). - # By adding 1, we make sure that the runtime is at least one second. This - # also takes care of proper rounding (we want to round runtime 1.1 to 2). - _rc0 = _rcr2, _rcw2 = os.pipe() - if os.fork(): - os.close(_rcw2) - os.dup2(_rcr2, 0) - subprocess.call(['bc'], shell=True) - else: - os.close(_rcr2) - os.dup2(_rcw2, 1) - print('(' + RUNTIME_F + ' + 1)/1') - # sys.exit(0) - - return RUNTIME_F - # # Parses the given output ($1) from a tool that was run through # `/usr/bin/time -v` and prints the memory usage in MB. # def get_tool_memory_usage(self, tool): - global RSS_KB - global RSS_MB - """The output from `/usr/bin/time -v` looks like this: [..] (output from the tool) @@ -622,47 +568,11 @@ def get_tool_memory_usage(self, tool): We want the value of 'resident set size' (RSS), which we convert from KB to MB. If the resulting value is less than 1 MB, round it to 1 MB. """ - RSS_KB = os.popen('egrep \'Maximum resident set size \\(kbytes\\').read().rstrip('\n') + ': <<< ' + ( - tool) + ' | cut -d: -f2)' - - RSS_MB = (RSS_KB // 1024) - print((RSS_MB if (RSS_MB > 0) else 1)) - - return RSS_MB + _, _, tail = tool.partition('Maximum resident set size (kbytes): ') + rss_kb = tail.split(' ')[0] + rss_mb = (rss_kb / 1024) - # - # Prints the actual output of a tool that was run through `/usr/bin/time -v`. - # The parameter ($1) is the combined output from the tool and `/usr/bin/time -v`. - # - def get_tool_output(self, output): - # The output from `/usr/bin/time -v` looks either like this (success): - # - # [..] (output from the tool) - # Command being timed: 'tool' - # [..] (other data) - # - # or like this (when there was an error): - # - # [..] (output from the tool) - # Command exited with non-zero status X - # [..] (other data) - # - # Remove everything after and including 'Command...' - # (http://stackoverflow.com/a/5227429/2580955). - _rcr1, _rcw1 = os.pipe() - if os.fork(): - os.close(_rcw1) - os.dup2(_rcr1, 0) - subprocess.call(['sed', '-n', '/Command exited with non-zero status/q;p'], shell=True) - else: - os.close(_rcr1) - os.dup2(_rcw1, 1) - _rc0 = subprocess.Popen('sed' + ' ' + '-n' + ' ' + '/Command being timed:/q;p', shell=True, - stdin=subprocess.PIPE) - _rc0.communicate(output + '\n') - - return _rc0.wait() - # sys.exit(0) + return rss_mb if (rss_mb > 0) else 1 # # Prints an escaped version of the given text so it can be inserted into JSON. @@ -691,9 +601,7 @@ def timed_kill(self, pid): 0 otherwise """ - global TIMEOUT - global timeout - + """ PID = pid # PID of the target process PROCESS_NAME = os.popen('ps -p ' + PID + ' -o comm --no-heading').read().rstrip('\n') @@ -705,11 +613,7 @@ def timed_kill(self, pid): # usage etc.). PID = os.popen('ps --ppid ' + PID + ' -o pid --no-heading | head -n1').read().rstrip('\n') - if not TIMEOUT: - TIMEOUT = 300 - - timeout = TIMEOUT - t = timeout + t = self.timeout while t > 0: time.sleep(1) @@ -723,6 +627,7 @@ def timed_kill(self, pid): subprocess.call(['kill_tree', PID, 'SIGKILL'], shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + """ return 0 # @@ -761,7 +666,7 @@ def decompile(self): # Initialize variables used by logging. if self.args.generate_log: - LOG_DECOMPILATION_START_DATE = time.strftime('%s') # os.popen('date + %s').read().rstrip('\n') + log_decompilation_start_date = time.strftime('%s') # os.popen('date + %s').read().rstrip('\n') # Put the tool log file and tmp file into /tmp because it uses tmpfs. This means that # the data are stored in RAM instead on the disk, which should provide faster access. tmp_dir = '/tmp/decompiler_log' @@ -786,18 +691,17 @@ def decompile(self): if Utils.is_macho_archive(self.input): out_archive = self.output + '.a' - if self.args.arch: + if self.arch: print() print('##### Restoring static library with architecture family ' + self.args.arch + '...') print( 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' + self.input) - if ( - not subprocess.call( - [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input], - shell=True)): + if (not subprocess.call( + [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input], + shell=True)): # Architecture not supported - print( - 'Invalid --arch option \'' + self.args.arch + '\'. File contains these architecture families:') + print('Invalid --arch option \'' + self.args.arch + + '\'. File contains these architecture families:') subprocess.call([config.EXTRACT, '--list', self.input], shell=True) self.cleanup() sys.exit(1) @@ -807,6 +711,7 @@ def decompile(self): print('##### Restoring best static library for decompilation...') print('RUN: ' + config.EXTRACT + ' --best --out ' + out_archive + ' ' + self.input) subprocess.call([config.EXTRACT, '--best', '--out', out_archive, self.input], shell=True) + self.input = out_archive print() @@ -820,17 +725,20 @@ def decompile(self): if Utils.has_thin_archive_signature(self.input): self.cleanup() Utils.print_error('File is a thin archive and cannot be decompiled.') + return # Check if our tools can handle it. if not Utils.is_valid_archive(self.input): self.cleanup() Utils.print_error('The input archive has invalid format.') + return # Get and check number of objects. arch_object_count = Utils.archive_object_count(self.input) if arch_object_count <= 0: self.cleanup() Utils.print_error('The input archive is empty.') + return # Prepare object output path. out_restored = self.output + '.restored' @@ -847,12 +755,15 @@ def decompile(self): valid_index = (arch_object_count - 1) if valid_index != 0: - Utils.print_error('File on index \'' + ( - self.args.ar_index) + '\' was not found in the input archive. Valid indexes are 0-' + ( + Utils.print_error('File on index \'' + self.args.ar_index + + '\' was not found in the input archive. Valid indexes are 0-' + ( valid_index) + '.') + return else: - Utils.print_error('File on index \'' + ( - self.args.ar_index) + '\' was not found in the input archive. The only valid index is 0.') + Utils.print_error('File on index \'' + self.args.ar_index + + '\' was not found in the input archive. The only valid index is 0.') + return + self.input = out_restored elif self.args.ar_name: print() @@ -890,8 +801,10 @@ def decompile(self): self.out_frontend_bc = out_frontend + '.bc' self.config = self.output + '.json' + print('Name is: ' + self.config) + if self.config != self.args.config: - Utils.remove_forced(self.config) + Utils.remove_file_forced(self.config) if self.args.config: shutil.copyfile(self.args.config, self.config) @@ -900,15 +813,18 @@ def decompile(self): if os.path.isfile(self.config): subprocess.call([config.CONFIGTOOL, self.config, '--preprocess'], shell=True) else: - print('{}', file=open(self.config, 'wb')) + with open(self.config, 'w') as f: + f.write('{}') # Raw data needs architecture, endianess and optionaly sections's vma and entry point to be specified. if self.args.mode == 'raw': if not self.args.arch or self.args.arch == 'unknown' or self.args.arch == '': Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) + return if not self.args.endian: Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) + return subprocess.call([config.CONFIGTOOL, self.config, '--write', '--format', 'raw'], shell=True) subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) @@ -922,9 +838,8 @@ def decompile(self): shell=True) if self.args.raw_section_vma: - subprocess.call( - [config.CONFIGTOOL, self.config, '--write', '--section-vma', self.args.raw_section_vma], - shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--section-vma', + self.args.raw_section_vma], shell=True) # # Call fileinfo to create an initial config file. @@ -952,19 +867,20 @@ def decompile(self): print('##### Gathering file information...') print('RUN: ' + config.FILEINFO + ' ' + ' '.join(fileinfo_params)) - if self.args.generate_log != '': - FILEINFO_AND_TIME_OUTPUT = os.popen(TIME + ' \'' + config.FILEINFO + '\' \'' - + ' '.join(fileinfo_params) + '\' 2>&1').read().rstrip('\n') + fileinfo_rc = 0 - fileinfo_rc = 0 # _rc0 #TODO use fileinfo rc - LOG_FILEINFO_RC = self.get_tool_rc(fileinfo_rc, FILEINFO_AND_TIME_OUTPUT) + if self.args.generate_log: + """ + tcmd = TimeMeasuredProcess() + LOG_FILEINFO_OUTPUT, fileinfo_rc, LOG_FILEINFO_RUNTIME = \ + tcmd.run_cmd([config.FILEINFO] + fileinfo_params) - LOG_FILEINFO_RUNTIME = self.get_tool_runtime(FILEINFO_AND_TIME_OUTPUT) - LOG_FILEINFO_MEMORY = self.get_tool_memory_usage(FILEINFO_AND_TIME_OUTPUT) - LOG_FILEINFO_OUTPUT = self.get_tool_output(FILEINFO_AND_TIME_OUTPUT) + LOG_FILEINFO_MEMORY = self.get_tool_memory_usage(LOG_FILEINFO_OUTPUT) print(LOG_FILEINFO_OUTPUT) + """ + pass else: - fileinfo_rc = subprocess.call([config.FILEINFO, ' '.join(fileinfo_params)], shell=True) + fileinfo_rc = subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) if fileinfo_rc != 0: if self.args.generate_log: @@ -1030,21 +946,26 @@ def decompile(self): print('RUN: ' + config.FILEINFO + ' ' + ' '.join(fileinfo_params)) if self.args.generate_log: + """ FILEINFO_AND_TIME_OUTPUT = os.popen( TIME + ' \'' + config.FILEINFO + '\' \'' + ' '.join(fileinfo_params) + '\' 2>&1').read().rstrip( '\n') fileinfo_rc = 0 # _rc0 - LOG_FILEINFO_RC = self.get_tool_rc(fileinfo_rc, FILEINFO_AND_TIME_OUTPUT) - FILEINFO_RUNTIME = self.get_tool_runtime(FILEINFO_AND_TIME_OUTPUT) + tcmd = TimeMeasuredProcess() + LOG_FILEINFO_OUTPUT, fileinfo_rc, LOG_FILEINFO_RUNTIME = \ + tcmd.run_cmd([config.FILEINFO] + fileinfo_params) + LOG_FILEINFO_RUNTIME = (LOG_FILEINFO_RUNTIME + FILEINFO_RUNTIME) FILEINFO_MEMORY = self.get_tool_memory_usage(FILEINFO_AND_TIME_OUTPUT) LOG_FILEINFO_MEMORY = (LOG_FILEINFO_MEMORY + FILEINFO_MEMORY) / 2 LOG_FILEINFO_OUTPUT = self.get_tool_output(FILEINFO_AND_TIME_OUTPUT) print(LOG_FILEINFO_OUTPUT) + """ + pass else: - fileinfo_rc = subprocess.call([config.FILEINFO, ' '.join(fileinfo_params)], shell=True) + fileinfo_rc = subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) if fileinfo_rc != 0: if self.args.generate_log: @@ -1061,38 +982,38 @@ def decompile(self): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) else: # Get full name of the target architecture including comments in parentheses - ARCH_FULL = os.popen( - '\'' + config.CONFIGTOOL + '\' \'' + self.config + '\' --read --arch | awk \'{print tolower($0').read().rstrip( - '\n') + arch_full = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --arch').read().rstrip('\n') + arch_full = arch_full.lower() # Strip comments in parentheses and all trailing whitespace # todo (ARCH_FULL % (*) what is this - self.args.arch = ARCH_FULL # os.popen('echo ' + (ARCH_FULL % (*) + ' | sed -e s / ^ [[: space:]] * // \'').read().rstrip('\n') + self.arch = arch_full # os.popen('echo ' + (ARCH_FULL % (*) + ' | sed -e s / ^ [[: space:]] * // \'').read().rstrip('\n') # Get object file format. - FORMAT = os.popen( - '\'' + config.CONFIGTOOL + '\' \'' + self.config + '\' --read --format | awk \'{print tolower($1').read().rstrip( - '\n') + format = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --format').read().rstrip('\n') + format = format.lower() # Intel HEX needs architecture to be specified - if FORMAT == 'ihex': - if not self.args.arch or self.args.arch == 'unknown': - Utils.print_error('Option -a|--arch must be used with format ' + FORMAT) + if format == 'ihex': + if not self.arch or self.arch == 'unknown': + Utils.print_error('Option -a|--arch must be used with format ' + format) + return if not self.args.endian: - Utils.print_error('Option -e|--endian must be used with format ' + FORMAT) + Utils.print_error('Option -e|--endian must be used with format ' + format) + return - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch], shell=True) subprocess.call([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32'], shell=True) subprocess.call([config.CONFIGTOOL, self.config, '--write', '--file-class', '32'], shell=True) subprocess.call([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian], shell=True) # Check whether the correct target architecture was specified. - if self.args.arch == 'arm' or self.args.arch == 'thumb': + if self.arch == 'arm' or self.arch == 'thumb': ords_dir = config.ARM_ORDS_DIR - elif self.args.arch == 'x86': + elif self.arch == 'x86': ords_dir = config.X86_ORDS_DIR - elif self.args.arch == 'powerpc' or self.args.arch == 'mips' or self.args.arch == 'pic32': + elif self.arch == 'powerpc' or self.arch == 'mips' or self.arch == 'pic32': pass else: # nothing @@ -1101,7 +1022,8 @@ def decompile(self): self.cleanup() Utils.print_error( - 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.args.arch) + 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) + return # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. @@ -1115,12 +1037,14 @@ def decompile(self): self.cleanup() Utils.print_error( - 'Unsupported target format '' + (FORMAT^^) + (FILECLASS) + ''. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.') + 'Unsupported target format \'%s%s\'. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.' % ( + format, fileclass)) + return # Set path to statically linked code signatures. # # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, or use both, for RAW. - sig_format = FORMAT + sig_format = format if sig_format == 'ihex' or sig_format == 'raw': sig_format = 'elf' @@ -1135,12 +1059,12 @@ def decompile(self): else: sig_endian = '' - sig_arch = self.args.arch + sig_arch = self.arch if sig_arch == 'pic32': sig_arch = 'mips' - signatures_dir = os.path.join(config.GENERIC_SIGNATURES_DIR, sig_format, fileclass, sig_endian) + signatures_dir = os.path.join(config.GENERIC_SIGNATURES_DIR, sig_format, fileclass, sig_endian, sig_arch) # SIGNATURES_DIR = config.GENERIC_SIGNATURES_DIR + '/' + SIG_FORMAT + '/' + FILECLASS + '/' + SIG_ENDIAN + '/' + ( # SIG_ARCH) @@ -1151,31 +1075,32 @@ def decompile(self): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true'], shell=True) - # Get signatures from selected archives. - if len(self.args.static_code_archive) > 0: - print() - print('##### Extracting signatures from selected archives...') - - lib_index = 0 - for lib in self.args.static_code_archive: - - print('Extracting signatures from file \'%s\'', lib) - CROP_ARCH_PATH = os.popen( - 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'').read().rstrip('\n') - sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' - - # if (subprocess.call(config.SIG_FROM_LIB + ' ' + lib + ' ' + '--output' + ' ' + SIG_OUT, shell=True, - # stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL)): - # Call sig from lib tool - sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) - if sig_from_lib.run(): - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out], - shell=True) - signatures_to_remove = [sig_out] - else: - Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') + if self.args.static_code_archive is not None: + # Get signatures from selected archives. + if len(self.args.static_code_archive) > 0: + print() + print('##### Extracting signatures from selected archives...') + + lib_index = 0 + for lib in self.args.static_code_archive: + + print('Extracting signatures from file \'%s\'', lib) + CROP_ARCH_PATH = os.popen( + 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'').read().rstrip('\n') + sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' + + # if (subprocess.call(config.SIG_FROM_LIB + ' ' + lib + ' ' + '--output' + ' ' + SIG_OUT, shell=True, + # stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL)): + # Call sig from lib tool + sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) + if sig_from_lib.run(): + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out], + shell=True) + self.signatures_to_remove = [sig_out] + else: + Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') - lib_index += 1 + lib_index += 1 # Store paths of signature files into config for frontend. if not self.args.no_default_static_signatures: @@ -1234,18 +1159,17 @@ def decompile(self): if self.out_frontend_ll == out_backend + '.ll': out_backend = self.output + '.backend.backend' - out_backend_bc = out_backend + '.bc' - OUT_BACKEND_LL = out_backend + '.ll' + self.out_backend_bc = out_backend + '.bc' + self.out_backend_ll = out_backend + '.ll' - bin2llvmir_params = [] ## ## Decompile the binary into LLVM IR. ## + bin2llvmir_params = config.BIN2LLVMIR_PARAMS + if self.args.keep_unreachable_funcs: # Prevent bin2llvmir from removing unreachable functions. - bin2llvmir_params = os.popen( - 'sed \' s / -unreachable-funcs * // g\' <<< \'' + config.BIN2LLVMIR_PARAMS + '\'').read().rstrip( - '\n') + bin2llvmir_params.remove('-unreachable-funcs') if not self.config and self.args.config: self.config = self.args.config @@ -1259,16 +1183,19 @@ def decompile(self): bin2llvmir_params.append('-max-memory-half-ram') print() - print('##### Decompiling ' + self.input + ' into ' + out_backend_bc + '...') - print('RUN: ' + config.BIN2LLVMIR + ' ' + ' '.join(bin2llvmir_params) + ' -o ' + out_backend_bc) + print('##### Decompiling ' + self.input + ' into ' + self.out_backend_bc + '...') + print('RUN: ' + config.BIN2LLVMIR + ' ' + ' '.join(bin2llvmir_params) + ' -o ' + self.out_backend_bc) + + bin2llvmir_rc = 0 if self.args.generate_log: + """ PID = 0 bin2llvmir_rc = 0 def thread1(): - subprocess.call([TIME, config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', ' '.join( - out_backend_bc)], shell=True, stdout=open(tool_log_file, 'wb'), stderr=subprocess.STDOUT) + subprocess.call([TIME, config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', + self.out_backend_bc], shell=True, stdout=open(tool_log_file, 'wb'), stderr=subprocess.STDOUT) threading.Thread(target=thread1).start() @@ -1289,9 +1216,11 @@ def thread2(): LOG_BIN2LLVMIR_MEMORY = self.get_tool_memory_usage(BIN2LLVMIR_AND_TIME_OUTPUT) LOG_BIN2LLVMIR_OUTPUT = self.get_tool_output(BIN2LLVMIR_AND_TIME_OUTPUT) print(LOG_BIN2LLVMIR_OUTPUT, end='') + """ else: - bin2llvmir_rc = subprocess.call([config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', out_backend_bc], - shell=True) + bin2llvmir_rc = subprocess.call( + [config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', self.out_backend_bc], + shell=True) if bin2llvmir_rc != 0: if self.args.generate_log: @@ -1306,7 +1235,7 @@ def thread2(): # LL mode goes straight to backend. if self.args.mode == 'll': - out_backend_bc = self.input + self.out_backend_bc = self.input self.config = self.args.config # Create parameters for the $LLVMIR2HLL call. @@ -1315,7 +1244,7 @@ def thread2(): self.args.backend_call_info_obtainer), '-arithm-expr-evaluator=' + ( self.args.backend_arithm_expr_evaluator), '-validate-module', '-llvmir2bir-converter=' + ( - self.args.backend_llvmir2bir_converter), '-o', self.output, out_backend_bc] + self.args.backend_llvmir2bir_converter), '-o', self.output, self.out_backend_bc] if self.args.backend_no_debug: llvmir2hll_params.append('-enable-debug') @@ -1386,10 +1315,13 @@ def thread2(): # Decompile the optimized IR code. print() - print('##### Decompiling ' + out_backend_bc + ' into ' + self.output + '...') + print('##### Decompiling ' + self.out_backend_bc + ' into ' + self.output + '...') print('RUN: ' + config.LLVMIR2HLL + ' ' + ' '.join(llvmir2hll_params)) + llvmir2hll_rc = 0 + if self.args.generate_log: + """ PID = 0 def thread3(): @@ -1418,15 +1350,16 @@ def thread4(): print(LOG_LLVMIR2HLL_OUTPUT) # Wait a bit to ensure that all the memory that has been assigned to the tool was released. time.sleep(0.1) + """ else: - llvmir2hll_rc = subprocess.call([config.LLVMIR2HLL, ' '.join(llvmir2hll_params)], shell=True) + llvmir2hll_rc = subprocess.call([config.LLVMIR2HLL] + llvmir2hll_params, shell=True) if llvmir2hll_rc != 0: if self.args.generate_log: self.generate_log() self.cleanup() - Utils.print_error('Decompilation of file %s failed' % out_backend_bc) + Utils.print_error('Decompilation of file %s failed' % self.out_backend_bc) self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll') @@ -1457,11 +1390,13 @@ def thread4(): # Note: Do not use the -i flag (in-place replace) as there is apparently no way # of getting sed -i to work consistently on both MacOS and Linux. # TODO - _rc4 = subprocess.call( - 'sed' + ' ' + '-e' + ' ' + ':a' + ' ' + '-e' + ' ' + '/^\\n*$/{$d;N;};/\\n$/ba' + ' ' + '-e' + ' ' + 's/[[:space:]]*$//', - shell=True, stdin=open(self.output, 'rb'), stdout=open(self.output + '.tmp', 'wb')) + with open(self.output, 'r') as file: + new = [line.rstrip() for line in file] + + with open(self.output, 'w') as fh: + [fh.write('%s\n' % line) for line in new] - shutil.move(self.output + '.tmp', self.output) + # shutil.move(self.output + '.tmp', self.output) # Colorize output file. if self.args.color_for_ida: diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 89fea8b59..5dc116694 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -69,7 +69,7 @@ def print_error_and_cleanup(self, message): # Cleanup. if not self.args.no_cleanup: - Utils.remove_forced(self.tmp_dir_path) + Utils.remove_dir_forced(self.tmp_dir_path) Utils.print_error(message + '.') @@ -165,7 +165,7 @@ def run(self): # Do cleanup. if not self.args.no_cleanup: - Utils.remove_forced(self.tmp_dir_path) + Utils.remove_dir_forced(self.tmp_dir_path) return result diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index b7b4778ee..46ff367e5 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """ The script tries to unpack the given executable file by using any @@ -27,11 +27,11 @@ from retdec_utils import Utils -def parse_args(): +def parse_args(_args): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('input', + parser.add_argument('file', metavar='FILE', help='The input file.') @@ -54,7 +54,7 @@ def parse_args(): action='store_true', help='Limit the maximal memory of retdec-unpacker to half of system RAM.') - return parser.parse_args() + return parser.parse_args(_args) class Unpacker: @@ -77,7 +77,7 @@ class Unpacker: UNPACKER_EXIT_CODE_PREPROCESSING_ERROR = 3 def __init__(self, _args): - self.args = _args + self.args = parse_args(_args) self.input = '' self.output = '' @@ -86,17 +86,17 @@ def _check_arguments(self): """ # Check whether the input file was specified. - if self.args.input is None: + if self.args.file is None: Utils.print_error('No input file was specified') return False - if not os.access(self.args.input, os.R_OK): - Utils.print_error('The input file %s does not exist or is not readable' % self.args.input) + if not os.access(self.args.file, os.R_OK): + Utils.print_error('The input file %s does not exist or is not readable' % self.args.file) return False # Conditional initialization. if not self.args.output: - self.output = self.args.input + '-unpacked' + self.output = self.args.file + '-unpacked' else: self.output = self.args.output @@ -111,8 +111,8 @@ def _check_arguments(self): return False # Convert to absolute paths. - self.input = Utils.get_realpath(self.args.input) - self.output = Utils.get_realpath(self.output) + self.input = os.path.abspath(self.args.file) #Utils.get_realpath(self.args.input) + self.output = os.path.abspath(self.output) #Utils.get_realpath(self.output) return True diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 1208d0d94..905072d9f 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -9,6 +9,7 @@ import signal import subprocess import sys +from timeit import Timer import retdec_config as config @@ -160,15 +161,43 @@ def kill(self): subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) +class TimeMeasuredProcess: + + def __init__(self): + self.output = '' + self.rc = 0 + + def run_cmd(self, args): + """ + + :param args: + :return: (output, return_code, time) + """ + + def runProcess(): + cmd = CmdRunner() + + self.output, self.rc, _ = cmd.run_cmd(args) + + t = Timer(runProcess) + + return self.output, self.rc, t.timeit(1) + + class Utils: @staticmethod - def remove_forced(path): + def remove_file_forced(file): + if os.path.exists(file): + os.remove(file) + + @staticmethod + def remove_dir_forced(path): if os.path.exists(path): for n in os.listdir(path): p = os.path.join(path, n) if os.path.isdir(p): - shutil.rmtree(p) + shutil.rmtree(p, ignore_errors=True) else: os.unlink(p) @@ -309,7 +338,7 @@ def is_macho_archive(path): 1 if file is not archive """ return subprocess.call([config.EXTRACT, '--check-archive', path], shell=True, - stderr=subprocess.STDOUT, stdout=None) + stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) != 2 @staticmethod def is_decimal_number(num): From 2d84524f0b5eaa37f2613df9cdd888be17618950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sat, 16 Jun 2018 09:01:21 +0200 Subject: [PATCH 10/48] Running until calling bin2llvmir now --- scripts/retdec_archive_decompiler.py | 6 +- scripts/retdec_config.py | 4 +- scripts/retdec_decompiler.py | 81 +++++++++++-------- .../retdec_signature_from_library_creator.py | 6 +- 4 files changed, 57 insertions(+), 40 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 12bfeb1de..f7b5ffdda 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -11,7 +11,7 @@ from retdec_utils import CmdRunner -def parse_args(): +def parse_args(_args): parser = argparse.ArgumentParser(description='Runs the decompilation script with the given optional arguments over' ' all files in the given static library or prints list of files in' ' plain text with --plain argument or in JSON format with' @@ -40,12 +40,12 @@ def parse_args(): dest="arg_list", help="args passed to the decompiler") - return parser.parse_args() + return parser.parse_args(_args) class ArchiveDecompiler: def __init__(self, _args): - self.args = _args + self.args = parse_args(_args) self.decompiler_sh_args = '' self.timeout = 300 diff --git a/scripts/retdec_config.py b/scripts/retdec_config.py index 4304b766c..7fe575d04 100644 --- a/scripts/retdec_config.py +++ b/scripts/retdec_config.py @@ -45,11 +45,11 @@ '-lower-expect', '-targetlibinfo', '-tbaa', '-basicaa', '-globalopt', '-mem2reg', '-instcombine', '-simplifycfg', '-basiccg', '-domtree', '-early-cse', '-lazy-value-info', '-jump-threading', '-correlated-propagation', '-simplifycfg', '-instcombine', '-simplifycfg', '-reassociate', '-domtree', - '-loops -loop-simplify', '-lcssa', '-loop-rotate', '-licm', '-lcssa', '-instcombine', '-scalar-evolution', + '-loops', '-loop-simplify', '-lcssa', '-loop-rotate', '-licm', '-lcssa', '-instcombine', '-scalar-evolution', '-loop-simplifycfg', '-loop-simplify', '-aa', '-loop-accesses', '-loop-load-elim', '-lcssa', '-indvars', '-loop-idiom', '-loop-deletion', '-memdep', '-gvn', '-memdep', '-sccp', '-instcombine', '-lazy-value-info', '-jump-threading', '-correlated-propagation', '-domtree', '-memdep', '-dse', '-dce', '-bdce', '-adce', '-die', - '-simplifycfg -instcombine -strip-dead-prototypes -globaldce -constmerge -constprop' + '-simplifycfg', '-instcombine', '-strip-dead-prototypes', '-globaldce', '-constmerge', '-constprop', '-instnamer', '-domtree', '-instcombine'] BIN2LLVMIR_PARAMS = ['-provider-init', '-decoder', '-verify', '-main-detection', '-idioms-libgcc', '-inst-opt', '-register', diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index b17cf29f6..922cfcd9c 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -10,15 +10,12 @@ import shutil import subprocess import sys -import threading import time -from datetime import date -from pathlib import Path import retdec_config as config -from retdec_utils import Utils, CmdRunner, TimeMeasuredProcess from retdec_signature_from_library_creator import SigFromLib from retdec_unpacker import Unpacker +from retdec_utils import Utils, CmdRunner def parse_args(): @@ -241,11 +238,11 @@ def parse_args(): help='Virtual address where section created from the raw binary will be placed') parser.add_argument('--select-decode-only', - dest='select_decode_only', + dest='selected_decode_only', help='Decode only selected parts (functions/ranges). Faster decompilation, but worse results.') parser.add_argument('--select-functions', - dest='selected_ranges', + dest='selected_functions', metavar='FUNCS', help='Specify a comma separated list of functions to decompile (example: fnc1,fnc2,fnc3).') @@ -316,6 +313,7 @@ def check_arguments(self): # Check whether the input file was specified. if not self.args.input: Utils.print_error('No input file was specified') + return False # Try to detect desired decompilation mode if not set by user. # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). @@ -336,30 +334,40 @@ def check_arguments(self): if not self.args.config or not self.args.no_config: Utils.print_error('Option --config or --no-config must be specified in mode ' + self.args.mode) + return False + elif self.args.mode == 'raw': # Errors -- missing critical arguments. if not self.args.arch: Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) + return False if not self.args.endian: Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) + return False if not self.args.raw_entry_point: Utils.print_error('Option --raw-entry-point must be used with mode ' + self.args.mode) + return False if not self.args.raw_section_vma: Utils.print_error('Option --raw-section-vma must be used with mode ' + self.args.mode) + return False if not Utils.is_number(self.args.raw_entry_point): Utils.print_error( 'Value in option --raw-entry-point must be decimal (e.g. 123) or hexadecimal value (e.g. 0x123)') + return False + if not Utils.is_number(self.args.raw_section_vma): Utils.print_error( 'Value in option --raw-section-vma must be decimal (e.g. 123) or hexadecimal value (e.g. 0x123)') + return False # Archive decompilation errors. if self.args.ar_name and self.args.ar_index: Utils.print_error('Options --ar-name and --ar-index are mutually exclusive. Pick one.') + return False if self.args.mode != 'bin': if self.args.ar_name: @@ -397,8 +405,8 @@ def check_arguments(self): self.output = fname + '.out.' + self.args.hll # Convert to absolute paths. - self.input = os.path.abspath(self.args.input) #Utils.get_realpath(self.args.input) - self.output = os.path.abspath(self.output) #Utils.get_realpath(self.output) + self.input = os.path.abspath(self.args.input) # Utils.get_realpath(self.args.input) + self.output = os.path.abspath(self.output) # Utils.get_realpath(self.output) if self.args.pdb and os.path.exists(self.args.pdb): self.args.pdb = Utils.get_realpath(self.args.pdb) @@ -411,6 +419,7 @@ def check_arguments(self): Utils.print_error( 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' '(e.g. 0x123-0xabc) range.' % r) + return False # Check if first <= last. ranges = self.args.selected_ranges.split('-') @@ -419,10 +428,13 @@ def check_arguments(self): Utils.print_error( 'Range \'%s\' in option --select-ranges is not a valid range: ' 'second address must be greater or equal than the first one.' % ranges) + return False if self.args.arch: self.arch = self.args.arch + return True + def print_warning_if_decompiling_bytecode(self): """Prints a warning if we are decompiling bytecode.""" @@ -662,7 +674,8 @@ def string_to_md5(self, string): def decompile(self): global TIME # Check arguments and set default values for unset options. - self.check_arguments() + if not self.check_arguments(): + return # Initialize variables used by logging. if self.args.generate_log: @@ -979,6 +992,7 @@ def decompile(self): # Check whether the architecture was specified. if self.args.arch: + self.arch = self.args.arch subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) else: # Get full name of the target architecture including comments in parentheses @@ -986,12 +1000,11 @@ def decompile(self): arch_full = arch_full.lower() # Strip comments in parentheses and all trailing whitespace - # todo (ARCH_FULL % (*) what is this - self.arch = arch_full # os.popen('echo ' + (ARCH_FULL % (*) + ' | sed -e s / ^ [[: space:]] * // \'').read().rstrip('\n') + self.arch = arch_full.strip() # Get object file format. format = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --format').read().rstrip('\n') - format = format.lower() + format = format.lower().strip() # Intel HEX needs architecture to be specified if format == 'ihex': @@ -1020,16 +1033,19 @@ def decompile(self): if self.args.generate_log: self.generate_log() - self.cleanup() - Utils.print_error( - 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) - return + self.cleanup() + Utils.print_error( + 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) + return # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. - cmd = CmdRunner() - fileclass, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--file-class']) - # FILECLASS = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --file-class').read().rstrip('\n') + #fileclass = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --file-class').read().rstrip('\n') + #fileclass = fileclass.strip() + + fileclass = '32' + + print(fileclass) if fileclass != '16' or fileclass != '32': if self.args.generate_log: @@ -1038,8 +1054,8 @@ def decompile(self): self.cleanup() Utils.print_error( 'Unsupported target format \'%s%s\'. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.' % ( - format, fileclass)) - return + format, fileclass)) + #return # Set path to statically linked code signatures. # @@ -1049,6 +1065,7 @@ def decompile(self): if sig_format == 'ihex' or sig_format == 'raw': sig_format = 'elf' + cmd = CmdRunner() endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--endian']) # ENDIAN = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --endian').read().rstrip('\n') @@ -1089,14 +1106,12 @@ def decompile(self): 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'').read().rstrip('\n') sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' - # if (subprocess.call(config.SIG_FROM_LIB + ' ' + lib + ' ' + '--output' + ' ' + SIG_OUT, shell=True, - # stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL)): # Call sig from lib tool sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) if sig_from_lib.run(): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out], shell=True) - self.signatures_to_remove = [sig_out] + self.signatures_to_remove.append(sig_out) else: Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') @@ -1107,8 +1122,9 @@ def decompile(self): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--signatures', signatures_dir], shell=True) # User provided signatures. - for i in self.args.static_code_sigfile: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', i], shell=True) + if self.args.static_code_sigfile: + for i in self.args.static_code_sigfile: + subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', i], shell=True) # Store paths of type files into config for frontend. # TODO doesnt even exist in sh except here @@ -1120,7 +1136,7 @@ def decompile(self): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + '/'], shell=True) # Store paths to file with PDB debugging information into config for frontend. - if os.path.exists(self.args.pdb): + if self.args.pdb and os.path.exists(self.args.pdb): subprocess.call([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb], shell=True) # Store file names of input and output into config for frontend. @@ -1218,9 +1234,8 @@ def thread2(): print(LOG_BIN2LLVMIR_OUTPUT, end='') """ else: - bin2llvmir_rc = subprocess.call( - [config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', self.out_backend_bc], - shell=True) + bin2llvmir_rc = subprocess.call([config.BIN2LLVMIR] + bin2llvmir_params + ['-o', self.out_backend_bc], + shell=True) if bin2llvmir_rc != 0: if self.args.generate_log: @@ -1228,8 +1243,9 @@ def thread2(): self.cleanup() Utils.print_error('Decompilation to LLVM IR failed') + return - self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir') + self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir') # modes 'bin' || 'raw' @@ -1360,8 +1376,9 @@ def thread4(): self.cleanup() Utils.print_error('Decompilation of file %s failed' % self.out_backend_bc) + return - self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll') + self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll') # Convert .dot graphs to desired format. if ((self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto') or ( diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 5dc116694..7daa8cb89 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -14,7 +14,7 @@ from retdec_utils import Utils -def parse_args(): +def parse_args(_args): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -53,12 +53,12 @@ def parse_args(): action='store_true', help='Stop after bin2pat.') - return parser.parse_args() + return parser.parse_args(_args) class SigFromLib: def __init__(self, _args): - self.args = _args + self.args = parse_args(_args) self.ignore_nop = '' self.file_path = '' self.tmp_dir_path = '' From 386556d57ebd6d5bed4416448f80f221552b07f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sat, 16 Jun 2018 09:54:14 +0200 Subject: [PATCH 11/48] [skip ci] Integrate @silverbacknet changes + some fixes --- scripts/retdec_config.py | 6 +- scripts/retdec_decompiler.py | 129 +++++++++++++++++------------------ scripts/retdec_utils.py | 10 +-- 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/scripts/retdec_config.py b/scripts/retdec_config.py index 7fe575d04..8adddc871 100644 --- a/scripts/retdec_config.py +++ b/scripts/retdec_config.py @@ -55,9 +55,9 @@ BIN2LLVMIR_PARAMS = ['-provider-init', '-decoder', '-verify', '-main-detection', '-idioms-libgcc', '-inst-opt', '-register', '-cond-branch-opt', '-syscalls', '-stack', '-constants', '-param-return', '-local-vars', '-inst-opt', '-simple-types', '-generate-dsm', '-remove-asm-instrs', '-class-hierarchy', '-select-fncs', '-unreachable-funcs', '-inst-opt', - '-value-protect'] + BIN2LLVMIR_LLVM_PASSES_ONLY + BIN2LLVMIR_LLVM_PASSES_ONLY + ['-simple-types', - '-stack-ptr-op-remove', '-inst-opt -idioms', '-global-to-local', '-dead-global-assign', '-instcombine', - '-phi2seq', '-value-protect'] + BIN2LLVMIR_PARAMS_DISABLES + '-value-protect', *BIN2LLVMIR_LLVM_PASSES_ONLY, *BIN2LLVMIR_LLVM_PASSES_ONLY, '-simple-types', + '-stack-ptr-op-remove', '-inst-opt', '-idioms', '-global-to-local', '-dead-global-assign', '-instcombine', + '-phi2seq', '-value-protect', *BIN2LLVMIR_PARAMS_DISABLES] # Paths to tools. FILEINFO = os.path.join(INSTALL_BIN_DIR, 'retdec-fileinfo') diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 922cfcd9c..fd42b4c7e 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -68,6 +68,7 @@ def parse_args(): parser.add_argument('-p', '--pdb', dest='pdb', metavar='FILE', + default='', help='File with PDB debug information.') parser.add_argument('--generate-log', @@ -201,7 +202,7 @@ def parse_args(): help='Removes temporary files created during the decompilation.') parser.add_argument('--color-for-ida', - dest='color_for_ida ', + dest='color_for_ida', help='Put IDA Pro color tags to output C file.') parser.add_argument('--config', @@ -259,10 +260,12 @@ def parse_args(): parser.add_argument('--static-code-sigfile', dest='static_code_sigfile', + default=[], help='Adds additional signature file for static code detection.') parser.add_argument('--static-code-archive', dest='static_code_archive', + default=[], help='Adds additional signature file for static code detection from given archive.') parser.add_argument('--no-default-static-signatures', @@ -673,6 +676,7 @@ def string_to_md5(self, string): def decompile(self): global TIME + cmd = CmdRunner() # Check arguments and set default values for unset options. if not self.check_arguments(): return @@ -814,8 +818,6 @@ def decompile(self): self.out_frontend_bc = out_frontend + '.bc' self.config = self.output + '.json' - print('Name is: ' + self.config) - if self.config != self.args.config: Utils.remove_file_forced(self.config) @@ -893,7 +895,8 @@ def decompile(self): """ pass else: - fileinfo_rc = subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) + fileinfo, fileinfo_rc, _ = cmd.run_cmd([config.FILEINFO, *fileinfo_params]) + print(fileinfo) if fileinfo_rc != 0: if self.args.generate_log: @@ -978,7 +981,8 @@ def decompile(self): """ pass else: - fileinfo_rc = subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) + fileinfo, fileinfo_rc, _ = cmd.run_cmd([config.FILEINFO, *fileinfo_params]) + print(fileinfo) if fileinfo_rc != 0: if self.args.generate_log: @@ -992,8 +996,7 @@ def decompile(self): # Check whether the architecture was specified. if self.args.arch: - self.arch = self.args.arch - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch]) else: # Get full name of the target architecture including comments in parentheses arch_full = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --arch').read().rstrip('\n') @@ -1003,30 +1006,29 @@ def decompile(self): self.arch = arch_full.strip() # Get object file format. - format = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --format').read().rstrip('\n') - format = format.lower().strip() + fileformat, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--format']) # Intel HEX needs architecture to be specified - if format == 'ihex': + if fileformat in ['ihex']: if not self.arch or self.arch == 'unknown': - Utils.print_error('Option -a|--arch must be used with format ' + format) + Utils.print_error('Option -a|--arch must be used with format ' + fileformat) return if not self.args.endian: - Utils.print_error('Option -e|--endian must be used with format ' + format) + Utils.print_error('Option -e|--endian must be used with format ' + fileformat) return - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32'], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--file-class', '32'], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--file-class', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian]) # Check whether the correct target architecture was specified. - if self.arch == 'arm' or self.arch == 'thumb': + if self.arch in ['arm', 'thumb']: ords_dir = config.ARM_ORDS_DIR - elif self.arch == 'x86': + elif self.arch in ['x86']: ords_dir = config.X86_ORDS_DIR - elif self.arch == 'powerpc' or self.arch == 'mips' or self.arch == 'pic32': + elif self.arch in ['powerpc', 'mips', 'pic32']: pass else: # nothing @@ -1040,14 +1042,9 @@ def decompile(self): # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. - #fileclass = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --file-class').read().rstrip('\n') - #fileclass = fileclass.strip() - - fileclass = '32' - - print(fileclass) + fileclass, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--file-class']) - if fileclass != '16' or fileclass != '32': + if fileclass not in ['16', '32']: if self.args.generate_log: self.generate_log() @@ -1055,17 +1052,15 @@ def decompile(self): Utils.print_error( 'Unsupported target format \'%s%s\'. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.' % ( format, fileclass)) - #return # Set path to statically linked code signatures. # # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, or use both, for RAW. - sig_format = format + sig_format = fileformat - if sig_format == 'ihex' or sig_format == 'raw': + if sig_format in ['ihex', 'raw']: sig_format = 'elf' - cmd = CmdRunner() endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--endian']) # ENDIAN = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --endian').read().rstrip('\n') @@ -1074,7 +1069,11 @@ def decompile(self): elif endian_result == 'big': sig_endian = 'be' else: - sig_endian = '' + if self.args.generate_log: + self.generate_log() + self.cleanup() + Utils.print_error('Cannot determine endiannesss.') + return sig_arch = self.arch @@ -1089,8 +1088,7 @@ def decompile(self): # Decompile unreachable functions. if self.args.keep_unreachable_funcs: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true'], - shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true']) if self.args.static_code_archive is not None: # Get signatures from selected archives. @@ -1102,15 +1100,14 @@ def decompile(self): for lib in self.args.static_code_archive: print('Extracting signatures from file \'%s\'', lib) - CROP_ARCH_PATH = os.popen( - 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'').read().rstrip('\n') + CROP_ARCH_PATH, _, _ = cmd.run_cmd( + 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'') sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' # Call sig from lib tool sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) if sig_from_lib.run(): - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out], - shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out]) self.signatures_to_remove.append(sig_out) else: Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') @@ -1119,13 +1116,12 @@ def decompile(self): # Store paths of signature files into config for frontend. if not self.args.no_default_static_signatures: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--signatures', signatures_dir], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--signatures', signatures_dir]) # User provided signatures. if self.args.static_code_sigfile: for i in self.args.static_code_sigfile: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--user-signature', i], shell=True) - + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', i]) # Store paths of type files into config for frontend. # TODO doesnt even exist in sh except here # if os.path.isdir(GENERIC_TYPES_DIR): @@ -1133,34 +1129,31 @@ def decompile(self): # Store path of directory with ORD files into config for frontend (note: only directory, not files themselves). if os.path.isdir(ords_dir): - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + '/'], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + '/']) # Store paths to file with PDB debugging information into config for frontend. if self.args.pdb and os.path.exists(self.args.pdb): - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb]) # Store file names of input and output into config for frontend. - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--input-file', self.input], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--frontend-output-file', self.out_frontend_ll], - shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--output-file', self.output], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--input-file', self.input]) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--frontend-output-file', self.out_frontend_ll]) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--output-file', self.output]) # Store decode only selected parts flag. if self.args.selected_decode_only: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'true'], - shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'true']) else: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'false'], - shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'false']) # Store selected functions or selected ranges into config for frontend. if self.args.selected_functions: for f in self.args.selected_functions: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--selected-func', f], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--selected-func', f]) if self.args.selected_ranges: for r in self.args.selected_ranges: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--selected-range', r], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--selected-range', r]) # Assignment of other used variables. # We have to ensure that the .bc version of the decompiled .ll file is placed @@ -1234,8 +1227,9 @@ def thread2(): print(LOG_BIN2LLVMIR_OUTPUT, end='') """ else: - bin2llvmir_rc = subprocess.call([config.BIN2LLVMIR] + bin2llvmir_params + ['-o', self.out_backend_bc], - shell=True) + bin22llvmir_out, bin2llvmir_rc, _ = cmd.run_cmd( + [config.BIN2LLVMIR, *bin2llvmir_params, '-o', self.out_backend_bc]) + print(bin22llvmir_out) if bin2llvmir_rc != 0: if self.args.generate_log: @@ -1255,12 +1249,12 @@ def thread2(): self.config = self.args.config # Create parameters for the $LLVMIR2HLL call. - llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + ( - self.args.backend_var_renamer), '-var-name-gen=fruit', '-var-name-gen-prefix=', '-call-info-obtainer=' + ( - self.args.backend_call_info_obtainer), '-arithm-expr-evaluator=' + ( - self.args.backend_arithm_expr_evaluator), '-validate-module', - '-llvmir2bir-converter=' + ( - self.args.backend_llvmir2bir_converter), '-o', self.output, self.out_backend_bc] + llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + self.args.backend_var_renamer, + '-var-name-gen=fruit', '-var-name-gen-prefix=', + '-call-info-obtainer=' + self.args.backend_call_info_obtainer, + '-arithm-expr-evaluator=' + self.args.backend_arithm_expr_evaluator, '-validate-module', + '-llvmir2bir-converter=' + self.args.backend_llvmir2bir_converter, '-o', self.output, + self.out_backend_bc] if self.args.backend_no_debug: llvmir2hll_params.append('-enable-debug') @@ -1368,7 +1362,8 @@ def thread4(): time.sleep(0.1) """ else: - llvmir2hll_rc = subprocess.call([config.LLVMIR2HLL] + llvmir2hll_params, shell=True) + llvmir2hll_out, llvmir2hll_rc, _ = cmd.run_cmd([config.LLVMIR2HLL, *llvmir2hll_params]) + print(llvmir2hll_out) if llvmir2hll_rc != 0: if self.args.generate_log: @@ -1390,16 +1385,16 @@ def thread4(): print( 'RUN: dot -T' + self.args.graph_format + ' ' + self.output + '.cg.dot > ' + self.output + '.cg.' + self.args.graph_format) - subprocess.call(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], shell=True, - stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], + stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) if self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto': for cfg in glob.glob(self.output + '.cfg.*.dot'): print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) - subprocess.call(['dot', '-T' + self.args.graph_format, cfg], shell=True, - stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, cfg], + stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) # Remove trailing whitespace and the last redundant empty new line from the # generated output (if any). It is difficult to do this in the back-end, so we @@ -1413,11 +1408,9 @@ def thread4(): with open(self.output, 'w') as fh: [fh.write('%s\n' % line) for line in new] - # shutil.move(self.output + '.tmp', self.output) - # Colorize output file. if self.args.color_for_ida: - subprocess.call([config.IDA_COLORIZER, self.output, self.config], shell=True) + cmd.run_cmd([config.IDA_COLORIZER, self.output, self.config]) # Store the information about the decompilation into the JSON file. if self.args.generate_log: diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 905072d9f..19dd41860 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -21,7 +21,7 @@ class CmdRunner: """A runner of external commands.""" def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', - output_encoding='utf-8', strip_shell_colors=True): + output_encoding='utf-8', strip_shell_colors=True, stdout=subprocess.STDOUT): """Runs the given command (synchronously). :param list cmd: Command to be run as a list of arguments (strings). @@ -74,15 +74,15 @@ def decode(output): try: p = self.start(cmd) output, _ = p.communicate(input, timeout) - return decode(output), p.returncode, False + return decode(output).rstrip(), p.returncode, False except subprocess.TimeoutExpired: # Kill the process, along with all its child processes. p.kill() # Finish the communication to obtain the output. output, _ = p.communicate() - return decode(output), p.returncode, True + return decode(output).rstrip(), p.returncode, True - def start(self, cmd, discard_output=False): + def start(self, cmd, discard_output=False, stdout=subprocess.STDOUT): """Starts the given command and returns a handler to it. :param list cmd: Command to be run as a list of arguments (strings). @@ -100,7 +100,7 @@ def start(self, cmd, discard_output=False): args=cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL if discard_output else subprocess.PIPE, - stderr=subprocess.DEVNULL if discard_output else subprocess.STDOUT + stderr=subprocess.DEVNULL if discard_output else stdout ) if Utils.is_windows(): return _WindowsProcess(**kwargs) From f10e7e398adcd15dc1a7e4b55bf5ea0fa96a07f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sat, 16 Jun 2018 11:27:31 +0200 Subject: [PATCH 12/48] [skip ci] Use CmdRunner.run_cmd everywhere in retdec_decompiler Small fixes and cleanup Early out if an error occurs --- scripts/retdec_decompiler.py | 134 +++++++++++++++++++++-------------- 1 file changed, 81 insertions(+), 53 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index fd42b4c7e..3714ae64d 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -8,7 +8,6 @@ import os import re import shutil -import subprocess import sys import time @@ -164,22 +163,27 @@ def parse_args(): parser.add_argument('--backend-no-debug-comments', dest='backend_no_debug_comments', + action='store_true', help='Disables the emission of debug comments in the generated code.') parser.add_argument('--backend-no-opts', dest='backend_no_opts', + action='store_true', help='Disables backend optimizations.') parser.add_argument('--backend-no-symbolic-names', dest='backend_no_symbolic_names', + action='store_true', help='Disables the conversion of constant arguments to their symbolic names.') parser.add_argument('--backend-no-time-varying-info', dest='backend_no_time_varying_info', + action='store_true', help='Do not emit time-varying information, like dates.') parser.add_argument('--backend-no-var-renaming', dest='backend_no_var_renaming', + action='store_true', help='Disables renaming of variables in the backend.') parser.add_argument('--backend-semantics', @@ -188,6 +192,7 @@ def parse_args(): parser.add_argument('--backend-strict-fpu-semantics', dest='backend_strict_fpu_semantics', + action='store_true', help='Disables backend optimizations.') parser.add_argument('--backend-var-renamer', @@ -199,10 +204,12 @@ def parse_args(): parser.add_argument('--cleanup', dest='cleanup', + action='store_true', help='Removes temporary files created during the decompilation.') parser.add_argument('--color-for-ida', dest='color_for_ida', + action='store_true', help='Put IDA Pro color tags to output C file.') parser.add_argument('--config', @@ -211,14 +218,17 @@ def parse_args(): parser.add_argument('--no-config', dest='no_config', + action='store_true', help='State explicitly that config file is not to be used.') parser.add_argument('--fileinfo-verbose', dest='fileinfo_verbose', + action='store_true', help='Print all detected information about input file.') parser.add_argument('--fileinfo-use-all-external-patterns', dest='fileinfo_use_all_external_patterns', + action='store_true', help='Use all detection rules from external YARA databases.') parser.add_argument('--graph-format', @@ -240,6 +250,7 @@ def parse_args(): parser.add_argument('--select-decode-only', dest='selected_decode_only', + action='store_true', help='Decode only selected parts (functions/ranges). Faster decompilation, but worse results.') parser.add_argument('--select-functions', @@ -270,6 +281,7 @@ def parse_args(): parser.add_argument('--no-default-static-signatures', dest='no_default_static_signatures', + action='store_true', help='No default signatures for statically linked code analysis are loaded ' '(options static-code-sigfile/archive are still available).') @@ -280,6 +292,7 @@ def parse_args(): parser.add_argument('--no-memory-limit', dest='no_memory_limit', + action='store_true', help='Disables the default memory limit (half of system RAM) of fileinfo, ' 'unpacker, bin2llvmir, and llvmir2hll.') @@ -469,6 +482,8 @@ def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): print() print('#### Forced stop due to - -stop - after %s...' % self.args.stop_after) # sys.exit(0) + return True + return False def cleanup(self): """Cleanup working directory""" @@ -679,7 +694,7 @@ def decompile(self): cmd = CmdRunner() # Check arguments and set default values for unset options. if not self.check_arguments(): - return + return 1 # Initialize variables used by logging. if self.args.generate_log: @@ -713,21 +728,23 @@ def decompile(self): print('##### Restoring static library with architecture family ' + self.args.arch + '...') print( 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' + self.input) - if (not subprocess.call( - [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input], - shell=True)): + + _, extract_rc, _ = cmd.run_cmd( + [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input]) + if not extract_rc: # Architecture not supported print('Invalid --arch option \'' + self.args.arch + '\'. File contains these architecture families:') - subprocess.call([config.EXTRACT, '--list', self.input], shell=True) + cmd.run_cmd([config.EXTRACT, '--list', self.input]) self.cleanup() - sys.exit(1) + # sys.exit(1) + return 1 else: # Pick best architecture print() print('##### Restoring best static library for decompilation...') print('RUN: ' + config.EXTRACT + ' --best --out ' + out_archive + ' ' + self.input) - subprocess.call([config.EXTRACT, '--best', '--out', out_archive, self.input], shell=True) + cmd.run_cmd([config.EXTRACT, '--best', '--out', out_archive, self.input]) self.input = out_archive @@ -742,20 +759,20 @@ def decompile(self): if Utils.has_thin_archive_signature(self.input): self.cleanup() Utils.print_error('File is a thin archive and cannot be decompiled.') - return + return 1 # Check if our tools can handle it. if not Utils.is_valid_archive(self.input): self.cleanup() Utils.print_error('The input archive has invalid format.') - return + return 1 # Get and check number of objects. arch_object_count = Utils.archive_object_count(self.input) if arch_object_count <= 0: self.cleanup() Utils.print_error('The input archive is empty.') - return + return 1 # Prepare object output path. out_restored = self.output + '.restored' @@ -775,11 +792,11 @@ def decompile(self): Utils.print_error('File on index \'' + self.args.ar_index + '\' was not found in the input archive. Valid indexes are 0-' + ( valid_index) + '.') - return + return 1 else: Utils.print_error('File on index \'' + self.args.ar_index + '\' was not found in the input archive. The only valid index is 0.') - return + return 1 self.input = out_restored elif self.args.ar_name: @@ -791,6 +808,8 @@ def decompile(self): if not Utils.archive_get_by_name(self.input, self.args.ar_name, out_restored): self.cleanup() Utils.print_error('File named %s was not found in the input archive.' % self.args.ar_name) + return 1 + self.input = out_restored else: # Print list of files. @@ -799,7 +818,7 @@ def decompile(self): Utils.archive_list_numbered_content(self.input) self.cleanup() - sys.exit(1) + return 1 else: if self.args.ar_name: Utils.print_warning('Option --ar-name can be used only with archives.') @@ -826,35 +845,32 @@ def decompile(self): # Preprocess existing file or create a new, empty JSON file. if os.path.isfile(self.config): - subprocess.call([config.CONFIGTOOL, self.config, '--preprocess'], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--preprocess']) else: with open(self.config, 'w') as f: f.write('{}') # Raw data needs architecture, endianess and optionaly sections's vma and entry point to be specified. if self.args.mode == 'raw': - if not self.args.arch or self.args.arch == 'unknown' or self.args.arch == '': + if not self.arch or self.arch == 'unknown' or self.arch == '': Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) - return + return 1 if not self.args.endian: Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) - return + return 1 - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--format', 'raw'], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32'], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--file-class', '32'], shell=True) - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--format', 'raw']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--file-class', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian]) if self.args.raw_entry_point: - subprocess.call( - [config.CONFIGTOOL, self.config, '--write', '--entry-point', self.args.raw_entry_point], - shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--entry-point', self.args.raw_entry_point]) if self.args.raw_section_vma: - subprocess.call([config.CONFIGTOOL, self.config, '--write', '--section-vma', - self.args.raw_section_vma], shell=True) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--section-vma', self.args.raw_section_vma]) # # Call fileinfo to create an initial config file. @@ -905,8 +921,10 @@ def decompile(self): self.cleanup() # The error message has been already reported by fileinfo in stderr. Utils.print_error('') + return 1 - self.check_whether_decompilation_should_be_forcefully_stopped('fileinfo') + if self.check_whether_decompilation_should_be_forcefully_stopped('fileinfo'): + return 0 ## ## Unpacking. @@ -928,14 +946,17 @@ def decompile(self): else: _, unpacker_rc = unpacker.unpack_all() - self.check_whether_decompilation_should_be_forcefully_stopped('unpacker') + if self.check_whether_decompilation_should_be_forcefully_stopped('unpacker'): + return 0 # RET_UNPACK_OK=0 # RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK=1 # RET_NOTHING_TO_DO=2 # RET_UNPACKER_FAILED_OTHERS_OK=3 # RET_UNPACKER_FAILED=4 - if unpacker_rc == 0 or unpacker_rc == 1 or unpacker_rc == 3: + if unpacker_rc == Unpacker.RET_UNPACK_OK or unpacker_rc == Unpacker.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK \ + or unpacker_rc == Unpacker.RET_UNPACKER_FAILED_OTHERS_OK: + # Successfully unpacked -> re-run fileinfo to obtain fresh information. self.input = self.out_unpacked fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] @@ -991,15 +1012,16 @@ def decompile(self): self.cleanup() # The error message has been already reported by fileinfo in stderr. Utils.print_error('') + return 1 self.print_warning_if_decompiling_bytecode() # Check whether the architecture was specified. - if self.args.arch: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.args.arch]) + if self.arch: + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) else: # Get full name of the target architecture including comments in parentheses - arch_full = os.popen(config.CONFIGTOOL + ' ' + self.config + ' --read --arch').read().rstrip('\n') + arch_full, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--arch']) arch_full = arch_full.lower() # Strip comments in parentheses and all trailing whitespace @@ -1012,11 +1034,11 @@ def decompile(self): if fileformat in ['ihex']: if not self.arch or self.arch == 'unknown': Utils.print_error('Option -a|--arch must be used with format ' + fileformat) - return + return 1 if not self.args.endian: Utils.print_error('Option -e|--endian must be used with format ' + fileformat) - return + return 1 cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32']) @@ -1037,8 +1059,9 @@ def decompile(self): self.cleanup() Utils.print_error( - 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) - return + 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' + % self.arch) + return 1 # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. @@ -1052,6 +1075,7 @@ def decompile(self): Utils.print_error( 'Unsupported target format \'%s%s\'. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.' % ( format, fileclass)) + return 1 # Set path to statically linked code signatures. # @@ -1062,7 +1086,6 @@ def decompile(self): sig_format = 'elf' endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--endian']) - # ENDIAN = os.popen(config.CONFIGTOOL + ' ' + CONFIG + ' --read --endian').read().rstrip('\n') if endian_result == 'little': sig_endian = 'le' @@ -1071,9 +1094,10 @@ def decompile(self): else: if self.args.generate_log: self.generate_log() + self.cleanup() Utils.print_error('Cannot determine endiannesss.') - return + return 1 sig_arch = self.arch @@ -1081,8 +1105,6 @@ def decompile(self): sig_arch = 'mips' signatures_dir = os.path.join(config.GENERIC_SIGNATURES_DIR, sig_format, fileclass, sig_endian, sig_arch) - # SIGNATURES_DIR = config.GENERIC_SIGNATURES_DIR + '/' + SIG_FORMAT + '/' + FILECLASS + '/' + SIG_ENDIAN + '/' + ( - # SIG_ARCH) self.print_warning_if_decompiling_bytecode() @@ -1100,12 +1122,13 @@ def decompile(self): for lib in self.args.static_code_archive: print('Extracting signatures from file \'%s\'', lib) - CROP_ARCH_PATH, _, _ = cmd.run_cmd( + # TODO replace command + crop_arch_path, _, _ = cmd.run_cmd( 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'') - sig_out = self.output + '.' + CROP_ARCH_PATH + '.' + lib_index + '.yara' + sig_out = self.output + '.' + crop_arch_path + '.' + lib_index + '.yara' # Call sig from lib tool - sig_from_lib = SigFromLib([lib, '--output ' + sig_out]) + sig_from_lib = SigFromLib([lib, '--output', sig_out]) if sig_from_lib.run(): cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out]) self.signatures_to_remove.append(sig_out) @@ -1122,6 +1145,7 @@ def decompile(self): if self.args.static_code_sigfile: for i in self.args.static_code_sigfile: cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', i]) + # Store paths of type files into config for frontend. # TODO doesnt even exist in sh except here # if os.path.isdir(GENERIC_TYPES_DIR): @@ -1129,7 +1153,7 @@ def decompile(self): # Store path of directory with ORD files into config for frontend (note: only directory, not files themselves). if os.path.isdir(ords_dir): - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + '/']) + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + os.path.sep]) # Store paths to file with PDB debugging information into config for frontend. if self.args.pdb and os.path.exists(self.args.pdb): @@ -1171,9 +1195,9 @@ def decompile(self): self.out_backend_bc = out_backend + '.bc' self.out_backend_ll = out_backend + '.ll' - ## - ## Decompile the binary into LLVM IR. - ## + # + # Decompile the binary into LLVM IR. + # bin2llvmir_params = config.BIN2LLVMIR_PARAMS if self.args.keep_unreachable_funcs: @@ -1239,7 +1263,8 @@ def thread2(): Utils.print_error('Decompilation to LLVM IR failed') return - self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir') + if self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir'): + return 0 # modes 'bin' || 'raw' @@ -1371,9 +1396,10 @@ def thread4(): self.cleanup() Utils.print_error('Decompilation of file %s failed' % self.out_backend_bc) - return + return 1 - self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll') + if self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll'): + return 0 # Convert .dot graphs to desired format. if ((self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto') or ( @@ -1815,9 +1841,11 @@ def thread4(): break """ + return 0 + if __name__ == '__main__': args = parse_args() decompiler = Decompiler(args) - decompiler.decompile() + sys.exit(decompiler.decompile()) From 5248482f83fa4512fa9a7c39817fb7a20aae2855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sat, 16 Jun 2018 17:47:28 +0200 Subject: [PATCH 13/48] [skip ci] Latest fixes add retdec_tests_runner.py --- scripts/retdec_archive_decompiler.py | 10 +- .../retdec_signature_from_library_creator.py | 2 +- scripts/retdec_tests_runner.py | 109 ++++++++++++++++++ scripts/retdec_unpacker.py | 2 +- 4 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 scripts/retdec_tests_runner.py diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index f7b5ffdda..0719cdd3e 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -98,7 +98,7 @@ def _check_arguments(self): self.use_json_format = True if self.args.arg_list: - self.decompiler_sh_args = ' '.join(self.args.arg_list) + self.decompiler_sh_args = self.args.arg_list if self.args.file: if not (os.path.isfile(self.args.file)): @@ -162,8 +162,8 @@ def decompile_archive(self): # Run the decompilation script over all the found files. print('Running \`%s' % config.DECOMPILER_SH, end='') - if self.decompiler_sh_args != '': - print(self.decompiler_sh_args, end='') + if self.decompiler_sh_args: + print(' '.join(self.decompiler_sh_args), end='') print('\` over %d files with timeout %d s. (run \`kill %d \` to terminate this script)...' % ( self.file_count, self.timeout, os.getpid()), file=sys.stderr) @@ -179,7 +179,7 @@ def decompile_archive(self): # Do not escape! output, _, timeouted = cmd.run_cmd([config.DECOMPILER_SH, '--ar-index=' + str(i), '-o', self.library_path + '.file_' + str(file_index) + '.c', - self.library_path, self.decompiler_sh_args], timeout=self.timeout) + self.library_path, *self.decompiler_sh_args], timeout=self.timeout) with open(log_file, 'wb') as f: f.write(output) @@ -194,7 +194,7 @@ def decompile_archive(self): if __name__ == '__main__': - args = parse_args() + args = parse_args(sys.argv) archive_decompiler = ArchiveDecompiler(args) archive_decompiler.decompile_archive() diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 7daa8cb89..633fd6da0 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -171,7 +171,7 @@ def run(self): if __name__ == '__main__': - args = parse_args() + args = parse_args(sys.argv) sig = SigFromLib(args) sys.exit(sig.run()) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py new file mode 100644 index 000000000..daeb9c39b --- /dev/null +++ b/scripts/retdec_tests_runner.py @@ -0,0 +1,109 @@ +#! /usr/bin/env python3 + +"""Runs all the installed unit tests.""" + +import sys +import os +import subprocess + +import retdec_config as config + +"""First argument can be verbose.""" +if sys.argv[1] == '-v' or sys.argv[1] == '--verbose': + verbose = True +else: + verbose = False + + +def print_colored(message, color): + """Emits a colored version of the given message to the standard output (without + a new line). + 2 string argument are needed: + $1 message to be colored + $2 color (red, green, yellow) + + If the color is unknown, it emits just $1. + """ + + if color == 'red': + print('\033[22;31m' + message + '\033[0m') + + elif color == 'green': + print('\033[22;32m' + message + '\033[0m') + + elif color == 'yellow': + print('\033[01;33m' + message + '\033[0m') + + else: + print(message + '\n') + + +def unit_tests_in_dir(path): + """Prints paths to all unit tests in the given directory. + 1 string argument is needed: + $1 path to the directory with unit tests + """ + + """On macOS, find does not support the '-executable' parameter (#238). + Therefore, on macOS, we have to use '-perm +111'. To explain, + means + 'any of these bits' and 111 is the octal representation for the + executable bit on owner, group, and other. Unfortunately, we cannot use + '-perm +111' on all systems because find on Linux/MSYS2 does not support + +. It supports only /, but this is not supported by find on macOS... + Hence, we need an if. + """ + + tests = [] + + for file in os.listdir(path): + if file.startswith('retdec-tests-'): + tests.append(file) + + tests.sort() + + return tests + + +def run_unit_tests_in_dir(path): + """Runs all unit tests in the given directory. + 1 string argument is needed: + + $1 path to the directory with unit tests + + Returns 0 if all tests passed, 1 otherwise. + """ + + tests_failed = False + tests_run = False + + for unit_test in unit_tests_in_dir(path): + print() + unit_test_name = os.popen('sed \'s/^.*/bin///' << '\'' + unit_test + '\'').read().rstrip('\n') + print_colored(unit_test_name, 'yellow') + print() + + + # TODO verbose support + return_code = subprocess.call([unit_test, '--gtest_color=yes'], shell=True) + + if return_code != 0: + tests_failed = True + if return_code >= 127: + # Segfault, floating-point exception, etc. + print_colored('FAILED (return code %d)\n' % return_code, 'red') + tests_run = True + + if tests_failed or not tests_run: + return 1 + else: + return 0 + + +if not os.path.isdir(config.UNIT_TESTS_DIR): + '''Run all binaries in unit test dir.''' + + sys.stderr.write('error: no unit tests found in %s' % config.UNIT_TESTS_DIR) + sys.exit(1) + +print('Running all unit tests in %s...' % config.UNIT_TESTS_DIR) +sys.exit(run_unit_tests_in_dir(config.UNIT_TESTS_DIR)) diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index 46ff367e5..cbc9327f6 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -207,7 +207,7 @@ def unpack_all(self): if __name__ == '__main__': - args = parse_args() + args = parse_args(sys.argv) unpacker = Unpacker(args) _, rc = unpacker.unpack_all() From b6975d06c30fc57146fb98a6cc4535d3a8426784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sun, 17 Jun 2018 14:45:55 +0200 Subject: [PATCH 14/48] [skip ci] Check that options are correct + cleanup and fixes --- scripts/retdec_decompiler.py | 577 ++++++----------------------------- 1 file changed, 92 insertions(+), 485 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 3714ae64d..5d85f7cdb 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -266,6 +266,7 @@ def parse_args(): parser.add_argument('--stop-after', dest='stop_after', + choices=['fileinfo', 'unpacker', 'bin2llvmir', 'llvmir2hll'], help='Stop the decompilation after the given tool ' '(supported tools: fileinfo, unpacker, bin2llvmir, llvmir2hll).') @@ -306,6 +307,8 @@ def __init__(self, _args): self.input = '' self.output = '' self.config = '' + self.selected_ranges = [] + self.selected_functions = [] self.arch = '' self.out_unpacked = '' @@ -331,6 +334,79 @@ def check_arguments(self): Utils.print_error('No input file was specified') return False + if not os.access(self.args.input, os.R_OK): + Utils.print_error('The input file \'%s\' does not exist or is not readable' % self.args.input) + return False + + if self.args.max_memory: + if self.args.no_memory_limit: + Utils.print_error('Clashing options: --max-memory and --no-memory-limit') + return False + + try: + max_memory = int(self.args.max_memory) + if max_memory > 0: + return True + except ValueError: + Utils.print_error( + 'Invalid value for --max-memory: %s (expected a positive integer)' % self.args.max_memory) + return False + + if self.args.static_code_archive: + # User provided archive to create signature file from. + if not os.path.isfile(self.args.static_code_archive): + Utils.print_error('Invalid archive file \'%s\'' % self.args.static_code_archive) + return False + + if self.args.static_code_sigfile: + # User provided signature file. + if not os.path.isfile(self.args.static_code_sigfile): + Utils.print_error('Invalid .yara file \'%s\'' % self.args.static_code_sigfile) + return False + + if self.args.selected_ranges: + self.selected_ranges = self.args.selected_ranges.split(',') + self.args.keep_unreachable_funcs = True + + # Check that selected ranges are valid. + for r in self.selected_ranges: + # Check if valid range. + if not Utils.is_range(r): + Utils.print_error( + 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' + '(e.g. 0x123-0xabc) range.' % r) + return False + + # Check if first <= last. + ranges = self.selected_ranges.split('-') + # parser line into array + if int(ranges[0]) > int(ranges[1]): + Utils.print_error( + 'Range \'%s\' in option --select-ranges is not a valid range: ' + 'second address must be greater or equal than the first one.' % ranges) + return False + + if self.args.selected_functions: + self.selected_functions = self.args.selected_functions.split(',') + self.args.keep_unreachable_funcs = True + + if self.args.no_config: + if self.args.config: + Utils.print_error('Option --no-config can not be used with option --config') + return False + + if self.args.config: + if not os.access(self.args.config, os.R_OK): + Utils.print_error( + 'The input JSON configuration file '' + (CONFIG_DB) + '' does not exist or is not readable') + return False + + if self.args.pdb: + # File containing PDB debug information. + if not os.access(self.args.pdb, os.R_OK): + Utils.print_error('The input PDB file \'%s\' does not exist or is not readable' % self.args.pdb) + return False + # Try to detect desired decompilation mode if not set by user. # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). if not self.args.mode: @@ -427,25 +503,6 @@ def check_arguments(self): if self.args.pdb and os.path.exists(self.args.pdb): self.args.pdb = Utils.get_realpath(self.args.pdb) - # Check that selected ranges are valid. - if self.args.selected_ranges: - for r in self.args.selected_ranges: - # Check if valid range. - if not Utils.is_range(r): - Utils.print_error( - 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' - '(e.g. 0x123-0xabc) range.' % r) - return False - - # Check if first <= last. - ranges = self.args.selected_ranges.split('-') - # parser line into array - if int(ranges[0]) > int(ranges[1]): - Utils.print_error( - 'Range \'%s\' in option --select-ranges is not a valid range: ' - 'second address must be greater or equal than the first one.' % ranges) - return False - if self.args.arch: self.arch = self.args.arch @@ -623,64 +680,6 @@ def remove_colors(self, text): res = re.compile(r's/\x1b[^m]*m//g') return res.sub('', text) - def timed_kill(self, pid): - """Platform-independent alternative to `ulimit -t` or `timeout`. - Based on http://www.bashcookbook.com/bashinfo/source/bash-4.0/examples/scripts/timeout3 - 1 argument is needed - PID - Returns - 1 if number of arguments is incorrect - 0 otherwise - """ - - """ - PID = pid - # PID of the target process - PROCESS_NAME = os.popen('ps -p ' + PID + ' -o comm --no-heading').read().rstrip('\n') - - if PROCESS_NAME == 'time': - # The program is run through `/usr/bin/time`, so get the PID of the - # child process (the actual program). Otherwise, if we killed - # `/usr/bin/time`, we would obtain no output from it (user time, memory - # usage etc.). - PID = os.popen('ps --ppid ' + PID + ' -o pid --no-heading | head -n1').read().rstrip('\n') - - t = self.timeout - - while t > 0: - time.sleep(1) - - if not subprocess.call(['kill', '-0', PID], shell=True, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL): - exit(0) - - t = t - 1 - - subprocess.call(['kill_tree', PID, 'SIGKILL'], shell=True, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) - - """ - return 0 - - # - # Kill process and all its children. - # Based on http://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes/3211182#3211182 - # 2 arguments are needed - PID of process to kill + signal type - # Returns - 1 if number of arguments is incorrect - # 0 otherwise - # - def kill_tree(self, pid, signal_type): - """ TODO implement - _pid = pid - _sig = Expand.colonMinus('2', 'TERM') - _rc0 = subprocess.call(['kill', '-stop', Expand.underbar() + 'pid'], shell=True) - - # needed to stop quickly forking parent from producing child between child killing and parent killing - for _child in os.popen('ps -o pid --no-headers --ppid \'' + Expand.underbar() + 'pid\'').read().rstrip('\n'): - kill_tree(Expand.underbar() + 'child', Expand.underbar() + 'sig') - _rc0 = subprocess.call(['kill', '-' + Expand.underbar() + 'sig', Expand.underbar() + 'pid'], shell=True) - """ - - return 0 - def string_to_md5(self, string): """Generate a MD5 checksum from a given string. """ @@ -690,8 +689,8 @@ def string_to_md5(self, string): return m.hexdigest() def decompile(self): - global TIME cmd = CmdRunner() + # Check arguments and set default values for unset options. if not self.check_arguments(): return 1 @@ -711,9 +710,10 @@ def decompile(self): # Raw. if self.args.mode == 'raw': # Entry point for THUMB must be odd. - if self.args.arch == 'thumb' or (self.args.raw_entry_point % 2) != 0: - self.args.keep_unreachable_funcs = 1 - # RAW_ENTRY_POINT = (RAW_ENTRY_POINT + 1) + if self.args.arch == 'thumb' or (self.args.raw_entry_point % 2) == 0: + self.args.raw_entry_point = (self.args.raw_entry_point + 1) + + self.args.keep_unreachable_funcs = True # Check for archives. if self.args.mode == 'bin': @@ -781,8 +781,8 @@ def decompile(self): if self.args.ar_index: print() print('##### Restoring object file on index '' + (self.args.ar_index) + '' from archive...') - print( - 'RUN: ' + config.AR + ' ' + self.input + ' --index ' + self.args.ar_index + ' --output ' + out_restored) + print('RUN: ' + config.AR + ' ' + self.input + ' --index ' + self.args.ar_index + ' --output ' + + out_restored) if not Utils.archive_get_by_index(self.input, self.args.ar_index, out_restored): self.cleanup() @@ -802,8 +802,8 @@ def decompile(self): elif self.args.ar_name: print() print('##### Restoring object file with name '' + (self.args.ar_name) + '' from archive...') - print( - 'RUN: ' + config.AR + ' ' + self.input + ' --name ' + self.args.ar_name + ' --output ' + out_restored) + print('RUN: ' + config.AR + ' ' + self.input + ' --name ' + self.args.ar_name + ' --output ' + + out_restored) if not Utils.archive_get_by_name(self.input, self.args.ar_name, out_restored): self.cleanup() @@ -813,8 +813,8 @@ def decompile(self): self.input = out_restored else: # Print list of files. - print('Please select file to decompile with either \' --ar-index = n\'') - print('or \' --ar-name = string\' option. Archive contains these files:') + print('Please select file to decompile with either \' --ar-index=n\'') + print('or \' --ar-name=string\' option. Archive contains these files:') Utils.archive_list_numbered_content(self.input) self.cleanup() @@ -828,11 +828,11 @@ def decompile(self): print('Not an archive, going to the next step.') - if self.args.mode == 'bin' or self.args.mode == 'raw': + if self.args.mode in ['bin', 'raw']: # Assignment of other used variables. name = os.path.splitext(self.output)[0] - self.out_unpacked = name + '-unpacked' out_frontend = self.output + '.frontend' + self.out_unpacked = name + '-unpacked' self.out_frontend_ll = out_frontend + '.ll' self.out_frontend_bc = out_frontend + '.bc' self.config = self.output + '.json' @@ -842,6 +842,7 @@ def decompile(self): if self.args.config: shutil.copyfile(self.args.config, self.config) + self.config = os.path.abspath(self.args.config) # Preprocess existing file or create a new, empty JSON file. if os.path.isfile(self.config): @@ -965,11 +966,11 @@ def decompile(self): fileinfo_params = ['-c', self.config, '--similarity', '--verbose', self.input] for pd in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: - fileinfo_params.extend(['--crypto ', pd]) + fileinfo_params.extend(['--crypto', pd]) if self.args.fileinfo_use_all_external_patterns: for ed in config.FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES: - fileinfo_params.extend(['--crypto ', ed]) + fileinfo_params.extend(['--crypto', ed]) if self.args.max_memory: fileinfo_params.extend(['--max-memory', self.args.max_memory]) @@ -1447,400 +1448,6 @@ def thread4(): print() print('##### Done!') - """ - while True: - - if (sys.argv[1]) == '-a' or (sys.argv[1]) == '--arch': - # Target architecture. - if (self.args.arch) != '': - subprocess.call(['print_error', 'Duplicate option: -a|--arch'], shell=True) - if (sys.argv[ - 2]) != 'mips' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'arm' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'powerpc' os.path.exists((sys.argv[2]))'!=' != '': - subprocess.call(['print_error', - 'Unsupported target architecture '' + (sys.argv[2]) + ''. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.'], - shell=True) - self.args.arch = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '-e' or (sys.argv[1]) == '--endian': - # Endian. - if ENDIAN != '': - utils.print_error('Duplicate option: -e|--endian') - ENDIAN = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '-h' or (sys.argv[1]) == '--help': - # Help. - print_help() - exit(0) - elif (sys.argv[1]) == '-k' or (sys.argv[1]) == '--keep-unreachable-funcs': - # Keep unreachable functions. - # Do not check if this parameter is a duplicate because when both - # --select-ranges or --select--functions and -k is specified, the - # decompilation fails. - self.args.keep_unreachable_funcs = 1 - subprocess.call(['shift'], shell=True) - elif (sys.argv[1]) == '-l' or (sys.argv[1]) == '--target-language': - # Target language. - if (HLL) != '': - utils.print_error('Duplicate option: -l|--target-language') - if (sys.argv[2]) != 'c' and os.path.exists((sys.argv[2])) != '': - utils.print_error('Unsupported target language '' + (sys.argv[2]) + ''. Supported languages: C, Python.') - HLL = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '-m' or (sys.argv[1]) == '--mode': - # Decompilation mode. - if (MODE) != '': - utils.print_error('Duplicate option: -m|--mode') - if (sys.argv[2]) != 'bin' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'raw': - utils.print_error('Unsupported decompilation mode '' + (sys.argv[2]) + ''. Supported modes: bin, ll, raw.') - MODE = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '-o' or (sys.argv[1]) == '--output': - # Output file. - if (OUT) != '': - subprocess.call(['print_error', 'Duplicate option: -o|--output'], shell=True) - OUT = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '-p' or (sys.argv[1]) == '--pdb': - # File containing PDB debug information. - if (self.args.pdb) != '': - subprocess.call(['print_error', 'Duplicate option: -p|--pdb'], shell=True) - self.args.pdb = sys.argv[2] - if not os.access, R_OK) ): - subprocess.call( - ['print_error', 'The input PDB file '' + (self.args.pdb) + '' does not exist or is not readable'], - shell=True) - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-aggressive-opts': - # Enable aggressive optimizations. - if (self.args.backend_aggressive_opts) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-aggressive-opts'], shell=True) - self.args.backend_aggressive_opts = 1 - subprocess.call(['shift'], shell=True) - elif (sys.argv[1]) == '--backend-arithm-expr-evaluator': - # Name of the evaluator of arithmetical expressions. - if (self.args.backend_arithm_expr_evaluator) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-arithm-expr-evaluator'], shell=True) - self.args.backend_arithm_expr_evaluator = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-call-info-obtainer': - # Name of the obtainer of information about function calls. - if (self.args.backend_call_info_obtainer) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-call-info-obtainer'], shell=True) - self.args.backend_call_info_obtainer = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-cfg-test': - # Unify the labels in the emitted CFG. - if (self.args.backend_cfg_test) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-cfg-test'], shell=True) - self.args.backend_cfg_test = 1 - subprocess.call(['shift'], shell=True) - elif (sys.argv[1]) == '--backend-disabled-opts': - # List of disabled optimizations in the backend. - if (self.args.backend_disabled_opts) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-disabled-opts'], shell=True) - self.args.backend_disabled_opts = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-emit-cfg': - # Emit a CFG of each function in the backend IR. - if (self.args.backend_emit_cfg) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-emit-cfg'], shell=True) - self.args.backend_emit_cfg = 1 - subprocess.call(['shift'], shell=True) - elif (sys.argv[1]) == '--backend-emit-cg': - # Emit a CG of the decompiled module in the backend IR. - if (self.args.backend_emit_cg) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-emit-cg'], shell=True) - self.args.backend_emit_cg = 1 - subprocess.call(['shift'], shell=True) - elif (sys.argv[1]) == '--backend-cg-conversion': - # Should the CG from the backend be converted automatically into the desired format?. - if (self.args.backend_cg_conversion) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-cg-conversion'], shell=True) - if (sys.argv[2]) != 'auto' os.path.exists((sys.argv[2]))'!=' != '': - subprocess.call(['print_error', - 'Unsupported CG conversion mode '' + (sys.argv[2]) + ''. Supported modes: auto, manual.'], - shell=True) - self.args.backend_cg_conversion = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-cfg-conversion': - # Should CFGs from the backend be converted automatically into the desired format?. - if (self.args.backend_cfg_conversion) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-cfg-conversion'], shell=True) - if (sys.argv[2]) != 'auto' os.path.exists((sys.argv[2]))'!=' != '': - subprocess.call(['print_error', - 'Unsupported CFG conversion mode '' + (sys.argv[2]) + ''. Supported modes: auto, manual.'], - shell=True) - self.args.backend_cfg_conversion = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif (sys.argv[1]) == '--backend-enabled-opts': - # List of enabled optimizations in the backend. - if (self.args.backend_enabled_opts) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-enabled-opts'], shell=True) - self.args.backend_enabled_opts = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--backend-find-patterns'): - # Try to find patterns. - if (self.args.backend_find_patterns) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-find-patterns'], shell=True) - self.args.backend_find_patterns = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--backend-force-module-name'): - # Force the module's name in the backend. - if (self.args.backend_force_module_name) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-force-module-name'], shell=True) - self.args.backend_force_module_name = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--backend-keep-all-brackets'): - # Keep all brackets. - if (self.args.backend_keep_all_brackets) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-keep-all-brackets'], shell=True) - self.args.backend_keep_all_brackets = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-keep-library-funcs'): - # Keep library functions. - if (self.args.backend_keep_library_funcs) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-keep-library-funcs'], shell=True) - self.args.backend_keep_library_funcs = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-llvmir2bir-converter'): - # Name of the converter of LLVM IR to BIR. - if (self.args.backend_llvmir2bir_converter) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-llvmir2bir-converter'], shell=True) - self.args.backend_llvmir2bir_converter = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--backend-no-compound-operators'): - # Do not use compound operators. - if (self.args.backend_no_compound_operators) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-compound-operators'], shell=True) - self.args.backend_no_compound_operators = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-debug'): - # Emission of debug messages. - if (self.args.backend_no_debug) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-debug'], shell=True) - self.args.backend_no_debug = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-debug-comments'): - # Emission of debug comments. - if (self.args.backend_no_debug_comments) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-debug-comments'], shell=True) - self.args.backend_no_debug_comments = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-opts'): - # Disable backend optimizations. - if (self.args.backend_no_opts) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-opts'], shell=True) - self.args.backend_no_opts = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-symbolic-names'): - # Disable the conversion of constant arguments. - if (self.args.backend_no_symbolic_names) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-symbolic-names'], shell=True) - self.args.backend_no_symbolic_names = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-time-varying-info'): - # Do not emit any time-varying information. - if (self.args.backend_no_time_varying_info) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-time-varying-info'], shell=True) - self.args.backend_no_time_varying_info = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-no-var-renaming'): - # Disable renaming of variables in the backend. - if (self.args.backend_no_var_renaming) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-no-var-renaming'], shell=True) - self.args.backend_no_var_renaming = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-semantics'): - # The used semantics in the backend. - if (self.args.backend_semantics) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-semantics'], shell=True) - self.args.backend_semantics = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--backend-strict-fpu-semantics'): - # Use strict FPU semantics in the backend. - if (self.args.backend_strict_fpu_semantics) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-strict-fpu-semantics'], shell=True) - self.args.backend_strict_fpu_semantics = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--backend-var-renamer'): - # Used renamer of variable names. - if (self.args.backend_var_renamer) != '': - subprocess.call(['print_error', 'Duplicate option: --backend-var-renamer'], shell=True) - if (sys.argv[ - 2]) != 'address' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'readable' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'unified': - subprocess.call(['print_error', - 'Unsupported variable renamer '' + (sys.argv[2]) + ''. Supported renamers: address, hungarian, readable, simple, unified.'], - shell=True) - self.args.backend_var_renamer = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--raw-entry-point'): - # Entry point address for binary created from raw data. - if (RAW_ENTRY_POINT) != '': - subprocess.call(['print_error', 'Duplicate option: --raw-entry-point'], shell=True) - RAW_ENTRY_POINT = sys.argv[2] - # RAW_ENTRY_POINT='$(($2))' # evaluate hex address - probably not needed - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--raw-section-vma'): - # Virtual memory address for section created from raw data. - if (RAW_SECTION_VMA) != '': - subprocess.call(['print_error', 'Duplicate option: --raw-section-vma'], shell=True) - RAW_SECTION_VMA = sys.argv[2] - # RAW_SECTION_VMA='$(($2))' # evaluate hex address - probably not needed - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--self.cleanup'): - # Cleanup. - if (CLEANUP) != '': - subprocess.call(['print_error', 'Duplicate option: --self.cleanup'], shell=True) - CLEANUP = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--color-for-ida'): - if (self.args.color_for_ida) != '': - subprocess.call(['print_error', 'Duplicate option: --color-for-ida'], shell=True) - self.args.color_for_ida = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--config'): - if (CONFIG_DB) != '': - subprocess.call(['print_error', 'Duplicate option: --config'], shell=True) - if (NO_CONFIG) != '': - subprocess.call(['print_error', 'Option --config can not be used with option --no-config'], - shell=True) - CONFIG_DB = sys.argv[2] - if (not os.access((CONFIG_DB), R_OK) ): - subprocess.call(['print_error', - 'The input JSON configuration file '' + (CONFIG_DB) + '' does not exist or is not readable'], - shell=True) - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--no-config'): - if (NO_CONFIG) != '': - subprocess.call(['print_error', 'Duplicate option: --no-config'], shell=True) - if (CONFIG_DB) != '': - subprocess.call(['print_error', 'Option --no-config can not be used with option --config'], - shell=True) - NO_CONFIG = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--graph-format'): - # Format of graph files. - if (self.args.graph_format) != '': - subprocess.call(['print_error', 'Duplicate option: --graph-format'], shell=True) - if (sys.argv[2]) != 'pdf' os.path.exists((sys.argv[2])) '!=' '-a' (sys.argv[2]) != 'svg': - subprocess.call(['print_error', - 'Unsupported graph format '' + (sys.argv[2]) + ''. Supported formats: pdf, png, svg.'], - shell=True) - self.args.graph_format = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--select-decode-only'): - if (self.args.selected_decode_only) != '': - subprocess.call(['print_error', 'Duplicate option: --select-decode-only'], shell=True) - self.args.selected_decode_only = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--select-functions'): - # List of selected functions. - if (self.args.selected_functions) != '': - subprocess.call(['print_error', 'Duplicate option: --select-functions'], shell=True) - IFS').setValue(',') - # parser line into array - self.args.keep_unreachable_funcs = 1 - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--select-ranges'): - # List of selected ranges. - if (self.args.selected_ranges) != '': - subprocess.call(['print_error', 'Duplicate option: --select-ranges'], shell=True) - self.args.selected_ranges = sys.argv[2] - IFS').setValue(',') - # parser line into array - self.args.keep_unreachable_funcs = 1 - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--stop-after'): - # Stop decompilation after the given tool. - if (STOP_AFTER) != '': - subprocess.call(['print_error', 'Duplicate option: --stop-after'], shell=True) - STOP_AFTER = sys.argv[2] - if (not re.search('^(fileinfo|unpacker|bin2llvmir|llvmir2hll)' + '$', (STOP_AFTER))): - subprocess.call(['print_error', 'Unsupported tool '' + (STOP_AFTER) + '' for --stop-after'], - shell=True) - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--static-code-sigfile'): - # User provided signature file. - if not os.path.isfile((sys.argv[2])): - subprocess.call(['print_error', 'Invalid .yara file '' + (sys.argv[2]) + '''], shell=True) - self.args.static_code_sigfile').setValue('(' + (sys.argv[2]) + ')') - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--static-code-archive'): - # User provided archive to create signature file from. - if not os.path.isfile((sys.argv[2])): - subprocess.call(['print_error', 'Invalid archive file '' + (sys.argv[2]) + '''], shell=True) - self.args.static_code_archive').setValue('(' + (sys.argv[2]) + ')') - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--no-default-static-signatures'): - self.args.no_default_static_signatures = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--fileinfo-verbose'): - # Enable --verbose mode in fileinfo. - if (self.args.fileinfo_verbose) != '': - subprocess.call(['print_error', 'Duplicate option: --fileinfo-verbose'], shell=True) - self.args.fileinfo_verbose = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--fileinfo-use-all-external-patterns'): - if (FILEINFO_USE_ALL_EXTERNAL_PATTERNS) != '': - subprocess.call(['print_error', 'Duplicate option: --fileinfo-use-all-external-patterns'], - shell=True) - FILEINFO_USE_ALL_EXTERNAL_PATTERNS = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--ar-name'): - # Archive decompilation by name. - if (self.args.ar_name) != '': - subprocess.call(['print_error', 'Duplicate option: --ar-name'], shell=True) - self.args.ar_name = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--ar-index'): - # Archive decompilation by index. - if (self.args.ar_index) != '': - subprocess.call(['print_error', 'Duplicate option: --ar-index'], shell=True) - self.args.ar_index = sys.argv[2] - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--max-memory'): - if (self.args.max_memory) != '': - subprocess.call(['print_error', 'Duplicate option: --max-memory'], shell=True) - if (self.args.no_memory_limit) != '': - subprocess.call(['print_error', 'Clashing options: --max-memory and --no-memory-limit'], shell=True) - self.args.max_memory = sys.argv[2] - if (not re.search(Str(Glob('^[0-9] + ' + '$')), (self.args.max_memory))): - subprocess.call(['print_error', - 'Invalid value for --max-memory: ' + (self.args.max_memory) + ' (expected a positive integer)'], - shell=True) - subprocess.call(['shift', '2'], shell=True) - elif ((sys.argv[1]) == '--no-memory-limit'): - if (self.args.no_memory_limit) != '': - subprocess.call(['print_error', 'Duplicate option: --no-memory-limit'], shell=True) - if (self.args.max_memory) != '': - subprocess.call(['print_error', 'Clashing options: --max-memory and --no-memory-limit'], shell=True) - self.args.no_memory_limit = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--generate-log'): - # Intentionally undocumented option. - # Used only for internal testing. - # NOT guaranteed it works everywhere (systems other than our internal test machines). - if (self.args.generate_log) != '': - subprocess.call(['print_error', 'Duplicate option: --generate-log'], shell=True) - self.args.generate_log = 1 - self.args.no_memory_limit = 1 - subprocess.call(['shift'], shell=True) - elif ((sys.argv[1]) == '--'): - # Input file. - if (Expand.hash() == 2): - IN = sys.argv[2] - if (not os.access((IN), R_OK) ): - subprocess.call( - ['print_error', 'The input file '' + (IN) + '' does not exist or is not readable'], - shell=True) - elif (Expand.hash() > 2): - # Invalid options. - subprocess.call( - ['print_error', 'Invalid options: '' + (sys.argv[2]) + '', '' + (sys.argv[3]) + '' ...'], - shell=True) - break - """ - return 0 From e29b06d2af51bfeea8670e4f1fec0779faf86273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sun, 17 Jun 2018 15:26:26 +0200 Subject: [PATCH 15/48] [skip ci] Fixed various errors --- scripts/retdec_archive_decompiler.py | 18 ++++---- scripts/retdec_decompiler.py | 62 +++++++++++++--------------- scripts/retdec_tests_runner.py | 2 +- scripts/retdec_utils.py | 46 ++++++++------------- 4 files changed, 55 insertions(+), 73 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 0719cdd3e..b2961dcf2 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -117,7 +117,7 @@ def decompile_archive(self): # Check arguments if not self._check_arguments(): - return + return 1 # Check for archives packed in Mach-O Universal Binaries. if Utils.is_macho_archive(self.library_path): @@ -126,7 +126,7 @@ def decompile_archive(self): subprocess.call([config.EXTRACT, '--objects', '--json', self.library_path], shell=True) else: subprocess.call([config.EXTRACT, '--objects', self.library_path], shell=True) - # sys.exit(1) + return 1 self.tmp_archive = self.library_path + '.a' subprocess.call([config.EXTRACT, '--best', '--out', self.tmp_archive, self.library_path], shell=True) @@ -135,19 +135,19 @@ def decompile_archive(self): # Check for thin archives. if Utils.has_thin_archive_signature(self.library_path) == 0: self._print_error_plain_or_json('File is a thin archive and cannot be decompiled.') - return + return 1 # Check if file is archive if not Utils.is_valid_archive(self.library_path): self._print_error_plain_or_json('File is not supported archive or is not readable.') - return + return 1 # Check number of files. self.file_count = Utils.archive_object_count(self.library_path) if self.file_count <= 0: self._print_error_plain_or_json('No files found in archive.') - return + return 1 # List only mode. if self.enable_list_mode: @@ -157,7 +157,7 @@ def decompile_archive(self): Utils.archive_list_numbered_content(self.library_path) self._cleanup() - # sys.exit(0) + return 0 # Run the decompilation script over all the found files. print('Running \`%s' % config.DECOMPILER_SH, end='') @@ -190,13 +190,11 @@ def decompile_archive(self): print('[OK]') self._cleanup() - # sys.exit(0) + return 0 if __name__ == '__main__': args = parse_args(sys.argv) archive_decompiler = ArchiveDecompiler(args) - archive_decompiler.decompile_archive() - - sys.exit(0) + sys.exit(archive_decompiler.decompile_archive()) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 5d85f7cdb..5e096163e 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -365,7 +365,7 @@ def check_arguments(self): return False if self.args.selected_ranges: - self.selected_ranges = self.args.selected_ranges.split(',') + self.selected_ranges = self.args.selected_ranges.strip().split(',') self.args.keep_unreachable_funcs = True # Check that selected ranges are valid. @@ -375,19 +375,19 @@ def check_arguments(self): Utils.print_error( 'Range %s in option --select-ranges is not a valid decimal (e.g. 123-456) or hexadecimal ' '(e.g. 0x123-0xabc) range.' % r) - return False + return False - # Check if first <= last. - ranges = self.selected_ranges.split('-') - # parser line into array - if int(ranges[0]) > int(ranges[1]): - Utils.print_error( - 'Range \'%s\' in option --select-ranges is not a valid range: ' - 'second address must be greater or equal than the first one.' % ranges) - return False + # Check if first <= last. + ranges = r.split('-') + # parser line into array + if int(ranges[0]) > int(ranges[1]): + Utils.print_error( + 'Range \'%s\' in option --select-ranges is not a valid range: ' + 'second address must be greater or equal than the first one.' % ranges) + return False if self.args.selected_functions: - self.selected_functions = self.args.selected_functions.split(',') + self.selected_functions = self.args.selected_functions.strip().split(',') self.args.keep_unreachable_funcs = True if self.args.no_config: @@ -407,6 +407,8 @@ def check_arguments(self): Utils.print_error('The input PDB file \'%s\' does not exist or is not readable' % self.args.pdb) return False + self.args.pdb = os.path.abspath(self.args.pdb) + # Try to detect desired decompilation mode if not set by user. # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). if not self.args.mode: @@ -449,7 +451,7 @@ def check_arguments(self): if not Utils.is_number(self.args.raw_entry_point): Utils.print_error( 'Value in option --raw-entry-point must be decimal (e.g. 123) or hexadecimal value (e.g. 0x123)') - return False + return False if not Utils.is_number(self.args.raw_section_vma): Utils.print_error( @@ -468,40 +470,36 @@ def check_arguments(self): if self.args.ar_index: Utils.print_warning('Option --ar-index is not used in mode ' + self.args.mode) - fname = '' if not self.args.output: # No output file was given, so use the default one. - fname = self.args.input - if fname.endswith('ll'): + input_name = self.args.input + if input_name.endswith('ll'): # Suffix .ll - self.output = fname[:-2] + self.args.hll - elif fname.endswith('exe'): + self.output = input_name[:-2] + self.args.hll + elif input_name.endswith('exe'): # Suffix .exe - self.output = fname[:-3] + self.args.hll + self.output = input_name[:-3] + self.args.hll print('Output is: ' + self.output) - elif fname.endswith('elf'): + elif input_name.endswith('elf'): # Suffix .elf - self.output = fname[:-3] + self.args.hll - elif fname.endswith('ihex'): + self.output = input_name[:-3] + self.args.hll + elif input_name.endswith('ihex'): # Suffix .ihex - self.output = fname[:-4] + self.args.hll - elif fname.endswith('macho'): + self.output = input_name[:-4] + self.args.hll + elif input_name.endswith('macho'): # Suffix .macho - self.output = fname[:-5] + self.args.hll + self.output = input_name[:-5] + self.args.hll else: self.output = self.output + PICKED_FILE + '.' + self.args.hll # If the output file name matches the input file name, we have to change the # output file name. Otherwise, the input file gets overwritten. if self.args.input == self.output: - self.output = fname + '.out.' + self.args.hll + self.output = self.args.input + '.out.' + self.args.hll # Convert to absolute paths. - self.input = os.path.abspath(self.args.input) # Utils.get_realpath(self.args.input) - self.output = os.path.abspath(self.output) # Utils.get_realpath(self.output) - - if self.args.pdb and os.path.exists(self.args.pdb): - self.args.pdb = Utils.get_realpath(self.args.pdb) + self.input = os.path.abspath(self.args.input) + self.output = os.path.abspath(self.output) if self.args.arch: self.arch = self.args.arch @@ -523,11 +521,8 @@ def print_warning_if_decompiling_bytecode(self): def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): """Checks whether the decompilation should be forcefully stopped because of the --stop-after parameter. If so, self.cleanup is run and the script exits with 0. - Arguments: - $1 Name of the tool. - The function expects the $STOP_AFTER variable to be set. """ @@ -538,7 +533,6 @@ def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): self.cleanup() print() print('#### Forced stop due to - -stop - after %s...' % self.args.stop_after) - # sys.exit(0) return True return False diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index daeb9c39b..ccdff9e7b 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -100,7 +100,7 @@ def run_unit_tests_in_dir(path): if not os.path.isdir(config.UNIT_TESTS_DIR): - '''Run all binaries in unit test dir.''' + """Run all binaries in unit test dir.""" sys.stderr.write('error: no unit tests found in %s' % config.UNIT_TESTS_DIR) sys.exit(1) diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 19dd41860..ba5a2be28 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -205,33 +205,18 @@ def remove_dir_forced(path): def is_windows(): return sys.platform in ('win32', 'msys') - @staticmethod - def get_realpath(path): - """Prints the real, physical location of a directory or file, relative or - absolute. - 1 argument is needed - """ - return str(pathlib.Path(path).resolve()) - @staticmethod def print_error(error): - """Print error message to stderr and die. + """Print error message to stderr. 1 argument is needed Returns - 1 if number of arguments is incorrect """ - # if error is None: - # sys.exit(1) - print('Error: %s' % error, file=sys.stdout) - # sys.exit(1) @staticmethod def print_warning(warning): """Print warning message to stderr. """ - if warning is None: - return - sys.stderr.write('Warning: %s' % warning) @staticmethod @@ -275,7 +260,10 @@ def archive_object_count(path): 1 argument is needed - file path Returns - 1 if error occurred """ - return subprocess.call([config.AR, path, '--object-count'], shell=True) + cmd = CmdRunner() + output, rc, _ = cmd.run_cmd([config.AR, path, '--object-count']) + + return int(output) if rc == 0 else 1 @staticmethod def archive_list_content(path): @@ -283,7 +271,9 @@ def archive_list_content(path): 1 argument is needed - file path Returns - 1 if number of arguments is incorrect """ - return subprocess.call([config.AR, path, '--list', '--no-numbers'], shell=True) + cmd = CmdRunner() + output, _, _ = cmd.run_cmd([config.AR, path, '--list', '--no-numbers']) + print(output) @staticmethod def archive_list_numbered_content(path): @@ -292,7 +282,9 @@ def archive_list_numbered_content(path): Returns - 1 if number of arguments is incorrect """ print('Index\tName') - return subprocess.call([config.AR, path, '--list'], shell=True) + cmd = CmdRunner() + output, _, _ = cmd.run_cmd([config.AR, path, '--list']) + print(output) @staticmethod def archive_list_numbered_content_json(path): @@ -300,7 +292,9 @@ def archive_list_numbered_content_json(path): 1 argument is needed - file path Returns - 1 if number of arguments is incorrect """ - return subprocess.call([config.AR, path, '--list', '--json'], shell=True) + cmd = CmdRunner() + output, _, _ = cmd.run_cmd([config.AR, path, '--list', '--json']) + print(output) @staticmethod def archive_get_by_name(path, name, output): @@ -347,8 +341,7 @@ def is_decimal_number(num): Returns - 0 if string is a valid decimal number. 1 otherwise """ - regex = '^[0-9]+$' - if re.search(regex, str(num)): + if re.search('^[0-9]+$', str(num)): return True else: return False @@ -360,8 +353,7 @@ def is_hexadecimal_number(num): Returns - 0 if string is a valid hexadecimal number. 1 otherwise """ - regex = '^0x[0-9a-fA-F]+$' - if re.search(regex, str(num)): + if re.search('^0x[0-9a-fA-F]+$', str(num)): return True else: return False @@ -388,8 +380,7 @@ def is_decimal_range(num): Returns - 0 if string is a valid decimal range. 1 otherwise """ - regex = '^[0-9]+-[0-9]+$' - if re.search(regex, str(num)): + if re.search('^[0-9]+-[0-9]+$', str(num)): return True else: return False @@ -401,8 +392,7 @@ def is_hexadecimal_range(num): Returns - 0 if string is a valid hexadecimal range 1 otherwise """ - regex = '^0x[0-9a-fA-F]+-0x[0-9a-fA-F]+$' - if re.search(regex, str(num)): + if re.search('^0x[0-9a-fA-F]+-0x[0-9a-fA-F]+$', str(num)): return True else: return False From 4a91c1c52f6715a22894898513bcb9a5c116cd01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sun, 17 Jun 2018 15:52:30 +0200 Subject: [PATCH 16/48] Try to fix running install-share script --- cmake/install-external.cmake | 21 +++++++++++++++++---- cmake/install-share.py | 4 +++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cmake/install-external.cmake b/cmake/install-external.cmake index 83061f29a..79e1ce42f 100644 --- a/cmake/install-external.cmake +++ b/cmake/install-external.cmake @@ -1,8 +1,21 @@ install(CODE " - execute_process( - COMMAND python3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" - RESULT_VARIABLE INSTALL_SHARE_RES - ) + if (WIN32) + execute_process( + COMMAND py -3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" + RESULT_VARIABLE INSTALL_SHARE_RES + ) + elseif(UNIX AND NOT APPLE) + execute_process( + COMMAND python3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" + RESULT_VARIABLE INSTALL_SHARE_RES + ) + elseif (APPLE) + execute_process( + COMMAND python \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" + RESULT_VARIABLE INSTALL_SHARE_RES + ) + endif() + if(INSTALL_SHARE_RES) message(FATAL_ERROR \"RetDec share directory installation FAILED\") endif() diff --git a/cmake/install-share.py b/cmake/install-share.py index ee0c5c5a0..4234f36c0 100755 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -3,10 +3,12 @@ """ Get RetDec share directory. """ +import sys +print (sys.version) + import hashlib import os import shutil -import sys import tarfile import urllib.request From a53cc43c312148016eee83555a77234de2901fb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sun, 17 Jun 2018 19:40:22 +0200 Subject: [PATCH 17/48] Should now work on every os --- cmake/install-external.cmake | 7 +------ cmake/install-share.py | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/cmake/install-external.cmake b/cmake/install-external.cmake index 79e1ce42f..4271eb5a0 100644 --- a/cmake/install-external.cmake +++ b/cmake/install-external.cmake @@ -4,16 +4,11 @@ install(CODE " COMMAND py -3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES ) - elseif(UNIX AND NOT APPLE) + else execute_process( COMMAND python3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES ) - elseif (APPLE) - execute_process( - COMMAND python \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" - RESULT_VARIABLE INSTALL_SHARE_RES - ) endif() if(INSTALL_SHARE_RES) diff --git a/cmake/install-share.py b/cmake/install-share.py index 4234f36c0..fe2d40fd8 100755 --- a/cmake/install-share.py +++ b/cmake/install-share.py @@ -4,8 +4,6 @@ Get RetDec share directory. """ import sys -print (sys.version) - import hashlib import os import shutil From a8de6fd895f131ee095e5ffc676fc6597ee259e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Sun, 17 Jun 2018 19:49:35 +0200 Subject: [PATCH 18/48] Fix compile error --- cmake/install-external.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/install-external.cmake b/cmake/install-external.cmake index 4271eb5a0..dcc8ead5d 100644 --- a/cmake/install-external.cmake +++ b/cmake/install-external.cmake @@ -4,7 +4,7 @@ install(CODE " COMMAND py -3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES ) - else + else() execute_process( COMMAND python3 \"${CMAKE_SOURCE_DIR}/cmake/install-share.py\" \"${CMAKE_INSTALL_PREFIX}\" RESULT_VARIABLE INSTALL_SHARE_RES From ea903dddfe690195b0b104e319948f321031730c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 13:32:55 +0200 Subject: [PATCH 19/48] Convert compile-yara to python --- support/yara_patterns/tools/compile-yara.py | 108 ++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 support/yara_patterns/tools/compile-yara.py diff --git a/support/yara_patterns/tools/compile-yara.py b/support/yara_patterns/tools/compile-yara.py new file mode 100644 index 000000000..94deb2aca --- /dev/null +++ b/support/yara_patterns/tools/compile-yara.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +"""Compile and install tool signatures. +Usage: compile-yara.py yarac-path source-path install-path +""" + +import os +import shutil +import subprocess +import sys + + +############################################################################### + +def printErrorAndDie(error): + """Print error and exit with non-zero value. + error - error message. + """ + print('Error: %s.' % error) + sys.exit(1) + + +def compileFiles(input_folder, output_folder): + """Compile yara signatures. + input_folder - input folder + output_folder - output file + """ + + p = subprocess.Popen([yarac, '-w', input_folder + '/*.yara', output_folder]) + out, _ = p.communicate() + + if p.returncode != 0: + printErrorAndDie('yarac failed during compilation of file' + input_folder) + + # Check for errors in output - yarac returns 0 when it should not. + if 'error' in out: + printErrorAndDie('yarac failed during compilation of file ' + input_folder) + + +############################################################################### + +if len(sys.argv) < 2: + printErrorAndDie('missing path to yarac') +yarac = sys.argv[1] + +if len(sys.argv) < 3: + printErrorAndDie('missing path to rules folder') + +rules_dir = sys.argv[2] + +if len(sys.argv) < 4: + printErrorAndDie('missing path to install folder') + +install_dir = sys.argv[3] + +############################################################################### + +# Directory paths. +rules_dir = os.path.join(rules_dir, 'support', 'yara_patterns', 'tools') +install_dir = os.path.join(install_dir, 'share', 'retdec', 'support', 'generic', 'yara_patterns', 'tools') + +############################################################################### + +# Remove old files if present. +if os.path.isfile(install_dir) or os.path.islink(install_dir): + os.unlink(install_dir) +else: + shutil.rmtree(install_dir, ignore_errors=True) + +# Prepare directory structure. +os.makedirs(os.path.join(install_dir, 'pe'), exist_ok=True) +os.makedirs(os.path.join(install_dir, 'elf'), exist_ok=True) +os.makedirs(os.path.join(install_dir, 'macho'), exist_ok=True) + +############################################################################### + +print('compiling yara signatures...') + +# Compile PE32 signatures. +compileFiles(os.path.join(rules_dir, 'pe', 'x86'), os.path.join(install_dir, 'pe', 'x86.yarac')) +compileFiles(os.path.join(rules_dir, 'pe', 'arm'), os.path.join(install_dir, 'pe', 'arm.yarac')) + +# Compile PE32+ signatures. +compileFiles(os.path.join(rules_dir, 'pe', 'x64'), os.path.join(install_dir, 'pe', 'x64.yarac')) + +# Compile ELF signatures. +compileFiles(os.path.join(rules_dir, 'elf', 'x86'), os.path.join(install_dir, 'elf', 'x86.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'arm'), os.path.join(install_dir, 'elf', 'arm.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'ppc'), os.path.join(install_dir, 'elf', 'ppc.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'mips'), os.path.join(install_dir, 'elf', 'mips.yarac')) + +# Compile ELF64 signatures. +compileFiles(os.path.join(rules_dir, 'elf', 'x64'), os.path.join(install_dir, 'elf', 'x64.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'arm64'), os.path.join(install_dir, 'elf', 'arm64.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'ppc64'), os.path.join(install_dir, 'elf', 'ppc64.yarac')) +compileFiles(os.path.join(rules_dir, 'elf', 'mips64'), os.path.join(install_dir, 'elf', 'mips64.yarac')) + +# Compile Mach-O signatures. +compileFiles(os.path.join(rules_dir, 'macho', 'x86'), os.path.join(install_dir, 'macho', 'x86.yarac')) +compileFiles(os.path.join(rules_dir, 'macho', 'arm'), os.path.join(install_dir, 'macho', 'arm.yarac')) +compileFiles(os.path.join(rules_dir, 'macho', 'ppc'), os.path.join(install_dir, 'macho', 'ppc.yarac')) + +# Compile 64-bit Mach-O signatures. +compileFiles(os.path.join(rules_dir, 'macho', 'x64'), os.path.join(install_dir, 'macho', 'x64.yarac')) +compileFiles(os.path.join(rules_dir, 'macho', 'ppc64'), os.path.join(install_dir, 'macho', 'ppc64.yarac')) + +print('signatures compiled successfully') +sys.exit(0) From 6d78c9b1e62c9f7bfd92542b448c3860e1c57fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 13:33:36 +0200 Subject: [PATCH 20/48] [skip ci] Make test runner more portable --- scripts/retdec_tests_runner.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index ccdff9e7b..875eedfd1 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -44,19 +44,11 @@ def unit_tests_in_dir(path): $1 path to the directory with unit tests """ - """On macOS, find does not support the '-executable' parameter (#238). - Therefore, on macOS, we have to use '-perm +111'. To explain, + means - 'any of these bits' and 111 is the octal representation for the - executable bit on owner, group, and other. Unfortunately, we cannot use - '-perm +111' on all systems because find on Linux/MSYS2 does not support - +. It supports only /, but this is not supported by find on macOS... - Hence, we need an if. - """ - tests = [] for file in os.listdir(path): - if file.startswith('retdec-tests-'): + file_name = os.path.basename(file) + if file_name.startswith('retdec-tests-'): tests.append(file) tests.sort() @@ -78,11 +70,10 @@ def run_unit_tests_in_dir(path): for unit_test in unit_tests_in_dir(path): print() - unit_test_name = os.popen('sed \'s/^.*/bin///' << '\'' + unit_test + '\'').read().rstrip('\n') + unit_test_name = os.path.basename(unit_test) print_colored(unit_test_name, 'yellow') print() - # TODO verbose support return_code = subprocess.call([unit_test, '--gtest_color=yes'], shell=True) From c1635da991c4c1a55304c8978c1599d5265241b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 13:41:49 +0200 Subject: [PATCH 21/48] [skip ci] Use correct code style --- support/yara_patterns/tools/compile-yara.py | 46 ++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/support/yara_patterns/tools/compile-yara.py b/support/yara_patterns/tools/compile-yara.py index 94deb2aca..d065d16a9 100644 --- a/support/yara_patterns/tools/compile-yara.py +++ b/support/yara_patterns/tools/compile-yara.py @@ -12,7 +12,7 @@ ############################################################################### -def printErrorAndDie(error): +def print_error_and_die(error): """Print error and exit with non-zero value. error - error message. """ @@ -20,7 +20,7 @@ def printErrorAndDie(error): sys.exit(1) -def compileFiles(input_folder, output_folder): +def compile_files(input_folder, output_folder): """Compile yara signatures. input_folder - input folder output_folder - output file @@ -30,26 +30,26 @@ def compileFiles(input_folder, output_folder): out, _ = p.communicate() if p.returncode != 0: - printErrorAndDie('yarac failed during compilation of file' + input_folder) + print_error_and_die('yarac failed during compilation of file' + input_folder) # Check for errors in output - yarac returns 0 when it should not. if 'error' in out: - printErrorAndDie('yarac failed during compilation of file ' + input_folder) + print_error_and_die('yarac failed during compilation of file ' + input_folder) ############################################################################### if len(sys.argv) < 2: - printErrorAndDie('missing path to yarac') + print_error_and_die('missing path to yarac') yarac = sys.argv[1] if len(sys.argv) < 3: - printErrorAndDie('missing path to rules folder') + print_error_and_die('missing path to rules folder') rules_dir = sys.argv[2] if len(sys.argv) < 4: - printErrorAndDie('missing path to install folder') + print_error_and_die('missing path to install folder') install_dir = sys.argv[3] @@ -77,32 +77,32 @@ def compileFiles(input_folder, output_folder): print('compiling yara signatures...') # Compile PE32 signatures. -compileFiles(os.path.join(rules_dir, 'pe', 'x86'), os.path.join(install_dir, 'pe', 'x86.yarac')) -compileFiles(os.path.join(rules_dir, 'pe', 'arm'), os.path.join(install_dir, 'pe', 'arm.yarac')) +compile_files(os.path.join(rules_dir, 'pe', 'x86'), os.path.join(install_dir, 'pe', 'x86.yarac')) +compile_files(os.path.join(rules_dir, 'pe', 'arm'), os.path.join(install_dir, 'pe', 'arm.yarac')) # Compile PE32+ signatures. -compileFiles(os.path.join(rules_dir, 'pe', 'x64'), os.path.join(install_dir, 'pe', 'x64.yarac')) +compile_files(os.path.join(rules_dir, 'pe', 'x64'), os.path.join(install_dir, 'pe', 'x64.yarac')) # Compile ELF signatures. -compileFiles(os.path.join(rules_dir, 'elf', 'x86'), os.path.join(install_dir, 'elf', 'x86.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'arm'), os.path.join(install_dir, 'elf', 'arm.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'ppc'), os.path.join(install_dir, 'elf', 'ppc.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'mips'), os.path.join(install_dir, 'elf', 'mips.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'x86'), os.path.join(install_dir, 'elf', 'x86.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'arm'), os.path.join(install_dir, 'elf', 'arm.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'ppc'), os.path.join(install_dir, 'elf', 'ppc.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'mips'), os.path.join(install_dir, 'elf', 'mips.yarac')) # Compile ELF64 signatures. -compileFiles(os.path.join(rules_dir, 'elf', 'x64'), os.path.join(install_dir, 'elf', 'x64.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'arm64'), os.path.join(install_dir, 'elf', 'arm64.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'ppc64'), os.path.join(install_dir, 'elf', 'ppc64.yarac')) -compileFiles(os.path.join(rules_dir, 'elf', 'mips64'), os.path.join(install_dir, 'elf', 'mips64.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'x64'), os.path.join(install_dir, 'elf', 'x64.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'arm64'), os.path.join(install_dir, 'elf', 'arm64.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'ppc64'), os.path.join(install_dir, 'elf', 'ppc64.yarac')) +compile_files(os.path.join(rules_dir, 'elf', 'mips64'), os.path.join(install_dir, 'elf', 'mips64.yarac')) # Compile Mach-O signatures. -compileFiles(os.path.join(rules_dir, 'macho', 'x86'), os.path.join(install_dir, 'macho', 'x86.yarac')) -compileFiles(os.path.join(rules_dir, 'macho', 'arm'), os.path.join(install_dir, 'macho', 'arm.yarac')) -compileFiles(os.path.join(rules_dir, 'macho', 'ppc'), os.path.join(install_dir, 'macho', 'ppc.yarac')) +compile_files(os.path.join(rules_dir, 'macho', 'x86'), os.path.join(install_dir, 'macho', 'x86.yarac')) +compile_files(os.path.join(rules_dir, 'macho', 'arm'), os.path.join(install_dir, 'macho', 'arm.yarac')) +compile_files(os.path.join(rules_dir, 'macho', 'ppc'), os.path.join(install_dir, 'macho', 'ppc.yarac')) # Compile 64-bit Mach-O signatures. -compileFiles(os.path.join(rules_dir, 'macho', 'x64'), os.path.join(install_dir, 'macho', 'x64.yarac')) -compileFiles(os.path.join(rules_dir, 'macho', 'ppc64'), os.path.join(install_dir, 'macho', 'ppc64.yarac')) +compile_files(os.path.join(rules_dir, 'macho', 'x64'), os.path.join(install_dir, 'macho', 'x64.yarac')) +compile_files(os.path.join(rules_dir, 'macho', 'ppc64'), os.path.join(install_dir, 'macho', 'ppc64.yarac')) print('signatures compiled successfully') sys.exit(0) From 500e24e7a45bacbd08cdd225470f21f657eaca94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 17:25:07 +0200 Subject: [PATCH 22/48] [skip ci] Decompiler script now runs successfully --- scripts/retdec_decompiler.py | 78 ++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 5e096163e..938d004c3 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -213,7 +213,7 @@ def parse_args(): help='Put IDA Pro color tags to output C file.') parser.add_argument('--config', - dest='config', + dest='config_db', help='Specify JSON decompilation configuration file.') parser.add_argument('--no-config', @@ -391,14 +391,14 @@ def check_arguments(self): self.args.keep_unreachable_funcs = True if self.args.no_config: - if self.args.config: + if self.args.config_db: Utils.print_error('Option --no-config can not be used with option --config') return False - if self.args.config: - if not os.access(self.args.config, os.R_OK): + if self.args.config_db: + if not os.access(self.args.config_db, os.R_OK): Utils.print_error( - 'The input JSON configuration file '' + (CONFIG_DB) + '' does not exist or is not readable') + 'The input JSON configuration file \'%s\' does not exist or is not readable' % self.args.config_db) return False if self.args.pdb: @@ -426,7 +426,7 @@ def check_arguments(self): if self.args.pdb: Utils.print_warning('Option -p|--pdb is not used in mode ' + self.args.mode) - if not self.args.config or not self.args.no_config: + if not self.args.config_db or not self.args.no_config: Utils.print_error('Option --config or --no-config must be specified in mode ' + self.args.mode) return False @@ -479,7 +479,6 @@ def check_arguments(self): elif input_name.endswith('exe'): # Suffix .exe self.output = input_name[:-3] + self.args.hll - print('Output is: ' + self.output) elif input_name.endswith('elf'): # Suffix .elf self.output = input_name[:-3] + self.args.hll @@ -544,7 +543,7 @@ def cleanup(self): Utils.remove_dir_forced(self.out_frontend_ll) Utils.remove_dir_forced(self.out_frontend_bc) - if self.config != self.args.config: + if self.config != self.args.config_db: Utils.remove_dir_forced(self.config) Utils.remove_dir_forced(self.out_backend_bc) @@ -695,9 +694,7 @@ def decompile(self): # Put the tool log file and tmp file into /tmp because it uses tmpfs. This means that # the data are stored in RAM instead on the disk, which should provide faster access. tmp_dir = '/tmp/decompiler_log' - os.makedirs(tmp_dir, exist_ok=True) - file_md5 = self.string_to_md5(self.output) tool_log_file = tmp_dir + '/' + file_md5 + '.tool' @@ -793,6 +790,7 @@ def decompile(self): return 1 self.input = out_restored + # Pick object by name elif self.args.ar_name: print() print('##### Restoring object file with name '' + (self.args.ar_name) + '' from archive...') @@ -831,12 +829,11 @@ def decompile(self): self.out_frontend_bc = out_frontend + '.bc' self.config = self.output + '.json' - if self.config != self.args.config: + if self.config != self.args.config_db: Utils.remove_file_forced(self.config) - if self.args.config: - shutil.copyfile(self.args.config, self.config) - self.config = os.path.abspath(self.args.config) + if self.args.config_db: + shutil.copyfile(self.args.config_db, self.config) # Preprocess existing file or create a new, empty JSON file. if os.path.isfile(self.config): @@ -873,7 +870,7 @@ def decompile(self): fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] if self.args.fileinfo_verbose: - fileinfo_params.extend(['-c', self.config, '--similarity', '--verbose', self.input]) + fileinfo_params = ['-c', self.config, '--similarity', '--verbose', self.input] for par in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: fileinfo_params.extend(['--crypto', par]) @@ -921,9 +918,9 @@ def decompile(self): if self.check_whether_decompilation_should_be_forcefully_stopped('fileinfo'): return 0 - ## - ## Unpacking. - ## + # + # Unpacking. + # unpack_params = ['--extended-exit-codes', '--output', self.out_unpacked, self.input] if self.args.max_memory: @@ -941,8 +938,8 @@ def decompile(self): else: _, unpacker_rc = unpacker.unpack_all() - if self.check_whether_decompilation_should_be_forcefully_stopped('unpacker'): - return 0 + if self.check_whether_decompilation_should_be_forcefully_stopped('unpacker'): + return 0 # RET_UNPACK_OK=0 # RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK=1 @@ -1024,6 +1021,7 @@ def decompile(self): # Get object file format. fileformat, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--format']) + fileformat = fileformat.lower() # Intel HEX needs architecture to be specified if fileformat in ['ihex']: @@ -1040,6 +1038,7 @@ def decompile(self): cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--file-class', '32']) cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian]) + ords_dir = '' # Check whether the correct target architecture was specified. if self.arch in ['arm', 'thumb']: ords_dir = config.ARM_ORDS_DIR @@ -1053,9 +1052,8 @@ def decompile(self): self.generate_log() self.cleanup() - Utils.print_error( - 'Unsupported target architecture %s. Supported architectures: Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' - % self.arch) + Utils.print_error('Unsupported target architecture %s. Supported architectures: ' + 'Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) return 1 # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. @@ -1074,7 +1072,8 @@ def decompile(self): # Set path to statically linked code signatures. # - # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, or use both, for RAW. + # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, + # or use both, for RAW. sig_format = fileformat if sig_format in ['ihex', 'raw']: @@ -1107,7 +1106,7 @@ def decompile(self): if self.args.keep_unreachable_funcs: cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true']) - if self.args.static_code_archive is not None: + if self.args.static_code_archive: # Get signatures from selected archives. if len(self.args.static_code_archive) > 0: print() @@ -1146,12 +1145,13 @@ def decompile(self): # if os.path.isdir(GENERIC_TYPES_DIR): # subprocess.call([config.CONFIGTOOL, CONFIG, '--write', '--types', GENERIC_TYPES_DIR], shell=True) - # Store path of directory with ORD files into config for frontend (note: only directory, not files themselves). + # Store path of directory with ORD files into config for frontend (note: only directory, + # not files themselves). if os.path.isdir(ords_dir): cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + os.path.sep]) # Store paths to file with PDB debugging information into config for frontend. - if self.args.pdb and os.path.exists(self.args.pdb): + if self.args.pdb: cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb]) # Store file names of input and output into config for frontend. @@ -1199,9 +1199,10 @@ def decompile(self): # Prevent bin2llvmir from removing unreachable functions. bin2llvmir_params.remove('-unreachable-funcs') - if not self.config and self.args.config: - self.config = self.args.config - bin2llvmir_params.extend(['-config-path', self.config]) + if self.config == '' and self.args.config_db: + self.config = self.args.config_db + + bin2llvmir_params.extend(['-config-path', self.config]) if self.args.max_memory: bin2llvmir_params.extend(['-max-memory', self.args.max_memory]) @@ -1246,8 +1247,8 @@ def thread2(): print(LOG_BIN2LLVMIR_OUTPUT, end='') """ else: - bin22llvmir_out, bin2llvmir_rc, _ = cmd.run_cmd( - [config.BIN2LLVMIR, *bin2llvmir_params, '-o', self.out_backend_bc]) + bin22llvmir_out, bin2llvmir_rc, _ = cmd.run_cmd([config.BIN2LLVMIR, *bin2llvmir_params, '-o', + self.out_backend_bc]) print(bin22llvmir_out) if bin2llvmir_rc != 0: @@ -1256,17 +1257,16 @@ def thread2(): self.cleanup() Utils.print_error('Decompilation to LLVM IR failed') - return + return 1 if self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir'): return 0 - # modes 'bin' || 'raw' # LL mode goes straight to backend. if self.args.mode == 'll': self.out_backend_bc = self.input - self.config = self.args.config + self.config = self.args.config_db # Create parameters for the $LLVMIR2HLL call. llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + self.args.backend_var_renamer, @@ -1276,14 +1276,14 @@ def thread2(): '-llvmir2bir-converter=' + self.args.backend_llvmir2bir_converter, '-o', self.output, self.out_backend_bc] - if self.args.backend_no_debug: + if not self.args.backend_no_debug: llvmir2hll_params.append('-enable-debug') - if self.args.backend_no_debug_comments: + if not self.args.backend_no_debug_comments: llvmir2hll_params.append('-emit-debug-comments') - if self.args.config: - llvmir2hll_params.append('-config-path=' + self.args.config) + if self.config: + llvmir2hll_params.append('-config-path=' + self.config) if self.args.backend_semantics: llvmir2hll_params.extend(['-semantics', self.args.backend_semantics]) From 0a2ab46991e10567267ceae7a64ad0b0e7ccc1cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 17:35:59 +0200 Subject: [PATCH 23/48] Now generates the same output as the bash script --- scripts/retdec_decompiler.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 938d004c3..f836f9353 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -1141,9 +1141,8 @@ def decompile(self): cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', i]) # Store paths of type files into config for frontend. - # TODO doesnt even exist in sh except here - # if os.path.isdir(GENERIC_TYPES_DIR): - # subprocess.call([config.CONFIGTOOL, CONFIG, '--write', '--types', GENERIC_TYPES_DIR], shell=True) + if os.path.isdir(config.GENERIC_TYPES_DIR): + cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--types', config.GENERIC_TYPES_DIR]) # Store path of directory with ORD files into config for frontend (note: only directory, # not files themselves). From ef7b5ce91693b6e33a7f5f9469965c039e56ada3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 17:57:23 +0200 Subject: [PATCH 24/48] Try fixing Travis on macOS --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 26933f4a2..577afc242 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,7 +55,9 @@ install: - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install gnu-getopt; fi # bash 4 - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install bash; fi - + # python 3 + - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install python3; fi + before_script: - eval "${MATRIX_EVAL}" # We need to use newer versions of Flex and Bison on MacOS X (the ones from Homebrew). From b6c52a6985364f73a7dbd94c0814bda07df92560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 18:04:03 +0200 Subject: [PATCH 25/48] Upgrade python instead --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 577afc242..6ac216a65 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,7 +56,7 @@ install: # bash 4 - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install bash; fi # python 3 - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install python3; fi + - if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew upgrade python; fi before_script: - eval "${MATRIX_EVAL}" From 4fa358a17765814b46de0bf3943ffa8e8741b376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 18:30:40 +0200 Subject: [PATCH 26/48] Test scripts in travis --- .travis.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6ac216a65..709626370 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,18 +81,15 @@ script: - cd .. - rm -rf build # Run unit tests. - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then /usr/local/bin/bash retdec-install/bin/retdec-tests-runner.sh; fi - - if [ "$TRAVIS_OS_NAME" = "linux" ]; then ./retdec-install/bin/retdec-tests-runner.sh; fi + - python3 retdec-install/bin/retdec_tests_runner.py; fi # Run the decompilation script. - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then /usr/local/bin/bash retdec-install/bin/retdec-decompiler.sh --help; fi - - if [ "$TRAVIS_OS_NAME" = "linux" ]; then ./retdec-install/bin/retdec-decompiler.sh --help; fi + - python3 retdec-install/bin/retdec_decompiler.py --help; fi # Run a simple decompilation. - echo -e '#include \n#include \nint main()\n{\n printf("hello world\\n");\n return 0;\n}\n' > hello-orig.c - cat hello-orig.c - gcc -m32 -o hello hello-orig.c - ./hello - - if [ "$TRAVIS_OS_NAME" = "osx" ]; then /usr/local/bin/bash retdec-install/bin/retdec-decompiler.sh hello; fi - - if [ "$TRAVIS_OS_NAME" = "linux" ]; then ./retdec-install/bin/retdec-decompiler.sh hello; fi + - python3 retdec-install/bin/retdec_decompiler.py hello; fi - cat hello.c - grep "int main(int argc, char \*\* argv)" hello.c From f8d2ae8f56045ebc28d833551ea56302841c1f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 18:42:08 +0200 Subject: [PATCH 27/48] Fix build --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 709626370..58855a4e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,15 +81,15 @@ script: - cd .. - rm -rf build # Run unit tests. - - python3 retdec-install/bin/retdec_tests_runner.py; fi + - python3 retdec-install/bin/retdec_tests_runner.py # Run the decompilation script. - - python3 retdec-install/bin/retdec_decompiler.py --help; fi + - python3 retdec-install/bin/retdec_decompiler.py --help # Run a simple decompilation. - echo -e '#include \n#include \nint main()\n{\n printf("hello world\\n");\n return 0;\n}\n' > hello-orig.c - cat hello-orig.c - gcc -m32 -o hello hello-orig.c - ./hello - - python3 retdec-install/bin/retdec_decompiler.py hello; fi + - python3 retdec-install/bin/retdec_decompiler.py hello - cat hello.c - grep "int main(int argc, char \*\* argv)" hello.c From 90b9ba6aa8cfde3648b1b4a79cea9fd7eaf56e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 18:53:07 +0200 Subject: [PATCH 28/48] Fix path --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 58855a4e6..a54ffa464 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,15 +81,15 @@ script: - cd .. - rm -rf build # Run unit tests. - - python3 retdec-install/bin/retdec_tests_runner.py + - python3 /retdec-install/bin/retdec_tests_runner.py # Run the decompilation script. - - python3 retdec-install/bin/retdec_decompiler.py --help + - python3 /retdec-install/bin/retdec_decompiler.py --help # Run a simple decompilation. - echo -e '#include \n#include \nint main()\n{\n printf("hello world\\n");\n return 0;\n}\n' > hello-orig.c - cat hello-orig.c - gcc -m32 -o hello hello-orig.c - ./hello - - python3 retdec-install/bin/retdec_decompiler.py hello + - python3 /retdec-install/bin/retdec_decompiler.py hello - cat hello.c - grep "int main(int argc, char \*\* argv)" hello.c From 192c39e92852ce5a6be71f222a78d2adeb5d3173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 20:56:23 +0200 Subject: [PATCH 29/48] Update build Small cleanup --- .travis.yml | 6 +-- scripts/CMakeLists.txt | 12 +++++ scripts/retdec_archive_decompiler.py | 9 ++-- scripts/retdec_config.py | 50 +++++++++++-------- scripts/retdec_decompiler.py | 2 +- scripts/retdec_fileinfo.py | 8 +-- .../retdec_signature_from_library_creator.py | 15 +++--- scripts/retdec_tests_runner.py | 2 +- scripts/retdec_unpacker.py | 8 +-- 9 files changed, 66 insertions(+), 46 deletions(-) diff --git a/.travis.yml b/.travis.yml index a54ffa464..58855a4e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,15 +81,15 @@ script: - cd .. - rm -rf build # Run unit tests. - - python3 /retdec-install/bin/retdec_tests_runner.py + - python3 retdec-install/bin/retdec_tests_runner.py # Run the decompilation script. - - python3 /retdec-install/bin/retdec_decompiler.py --help + - python3 retdec-install/bin/retdec_decompiler.py --help # Run a simple decompilation. - echo -e '#include \n#include \nint main()\n{\n printf("hello world\\n");\n return 0;\n}\n' > hello-orig.c - cat hello-orig.c - gcc -m32 -o hello hello-orig.c - ./hello - - python3 /retdec-install/bin/retdec_decompiler.py hello + - python3 retdec-install/bin/retdec_decompiler.py hello - cat hello.c - grep "int main(int argc, char \*\* argv)" hello.c diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index ea6c726be..7b993617e 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -9,3 +9,15 @@ endif() install(PROGRAMS "retdec-signature-from-library-creator.sh" DESTINATION bin) install(PROGRAMS "retdec-unpacker.sh" DESTINATION bin) install(PROGRAMS "retdec-utils.sh" DESTINATION bin) + +# copy python scripts +install(PROGRAMS "retdec_config.py" DESTINATION bin) +install(PROGRAMS "retdec_archive_decompiler.py" DESTINATION bin) +install(PROGRAMS "retdec_decompiler.py" DESTINATION bin) +install(PROGRAMS "retdec_fileinfo.py" DESTINATION bin) +if(RETDEC_TESTS) + install(PROGRAMS "retdec_tests_runner.py" DESTINATION bin) +endif() +install(PROGRAMS "retdec_signature_from_library_creator.py" DESTINATION bin) +install(PROGRAMS "retdec_unpacker.py" DESTINATION bin) +install(PROGRAMS "retdec_utils.py" DESTINATION bin) \ No newline at end of file diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index b2961dcf2..1a78b7b07 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 import argparse import os @@ -65,7 +65,6 @@ def _print_error_plain_or_json(self, error): print('{') print(' \'error\' : \'' + message + '\'') print('}') - # exit(1) else: # Otherwise print in plain text. Utils.print_error(error) @@ -101,7 +100,7 @@ def _check_arguments(self): self.decompiler_sh_args = self.args.arg_list if self.args.file: - if not (os.path.isfile(self.args.file)): + if not os.path.isfile(self.args.file): Utils.print_error('Input %s is not a valid file.' % self.args.file) return False @@ -160,7 +159,7 @@ def decompile_archive(self): return 0 # Run the decompilation script over all the found files. - print('Running \`%s' % config.DECOMPILER_SH, end='') + print('Running \`%s' % config.DECOMPILER, end='') if self.decompiler_sh_args: print(' '.join(self.decompiler_sh_args), end='') @@ -177,7 +176,7 @@ def decompile_archive(self): log_file = self.library_path + '.file_' + str(file_index) + '.log.verbose' # Do not escape! - output, _, timeouted = cmd.run_cmd([config.DECOMPILER_SH, '--ar-index=' + str(i), '-o', + output, _, timeouted = cmd.run_cmd([config.DECOMPILER, '--ar-index=' + str(i), '-o', self.library_path + '.file_' + str(file_index) + '.c', self.library_path, *self.decompiler_sh_args], timeout=self.timeout) diff --git a/scripts/retdec_config.py b/scripts/retdec_config.py index 8adddc871..d36602410 100644 --- a/scripts/retdec_config.py +++ b/scripts/retdec_config.py @@ -13,9 +13,11 @@ INSTALL_SHARE_DIR = os.path.join(INSTALL_BIN_DIR, '..', 'share', 'retdec') INSTALL_SUPPORT_DIR = os.path.join(INSTALL_SHARE_DIR, 'support') INSTALL_SHARE_YARA_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'generic', 'yara_patterns') + # generic configuration GENERIC_TYPES_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'generic', 'types') GENERIC_SIGNATURES_DIR = os.path.join(INSTALL_SHARE_YARA_DIR, 'static-code') + # ARM-specific configuration ARM_ORDS_DIR = os.path.join(INSTALL_SUPPORT_DIR, 'arm', 'ords') # X86-specific configuration @@ -41,37 +43,43 @@ times. This is the reason why it is placed at the very end. """ BIN2LLVMIR_PARAMS_DISABLES = ['-disable-inlining', '-disable-simplify-libcalls'] -BIN2LLVMIR_LLVM_PASSES_ONLY = ['-instcombine', '-tbaa', '-targetlibinfo', '-basicaa', '-domtree', '-simplifycfg', '-domtree', '-early-cse', - '-lower-expect', '-targetlibinfo', '-tbaa', '-basicaa', '-globalopt', '-mem2reg', '-instcombine', - '-simplifycfg', '-basiccg', '-domtree', '-early-cse', '-lazy-value-info', '-jump-threading', - '-correlated-propagation', '-simplifycfg', '-instcombine', '-simplifycfg', '-reassociate', '-domtree', - '-loops', '-loop-simplify', '-lcssa', '-loop-rotate', '-licm', '-lcssa', '-instcombine', '-scalar-evolution', - '-loop-simplifycfg', '-loop-simplify', '-aa', '-loop-accesses', '-loop-load-elim', '-lcssa', '-indvars', - '-loop-idiom', '-loop-deletion', '-memdep', '-gvn', '-memdep', '-sccp', '-instcombine', '-lazy-value-info', - '-jump-threading', '-correlated-propagation', '-domtree', '-memdep', '-dse', '-dce', '-bdce', '-adce', '-die', - '-simplifycfg', '-instcombine', '-strip-dead-prototypes', '-globaldce', '-constmerge', '-constprop', - '-instnamer', '-domtree', '-instcombine'] +BIN2LLVMIR_LLVM_PASSES_ONLY = ['-instcombine', '-tbaa', '-targetlibinfo', '-basicaa', '-domtree', '-simplifycfg', + '-domtree', '-early-cse', '-lower-expect', '-targetlibinfo', '-tbaa', '-basicaa', + '-globalopt', '-mem2reg', '-instcombine', '-simplifycfg', '-basiccg', '-domtree', + '-early-cse', '-lazy-value-info', '-jump-threading', '-correlated-propagation', + '-simplifycfg', '-instcombine', '-simplifycfg', '-reassociate', '-domtree', '-loops', + '-loop-simplify', '-lcssa', '-loop-rotate', '-licm', '-lcssa', '-instcombine', + '-scalar-evolution', '-loop-simplifycfg', '-loop-simplify', '-aa', '-loop-accesses', + '-loop-load-elim', '-lcssa', '-indvars', '-loop-idiom', '-loop-deletion', '-memdep', + '-gvn', '-memdep', '-sccp', '-instcombine', '-lazy-value-info', '-jump-threading', + '-correlated-propagation', '-domtree', '-memdep', '-dse', '-dce', '-bdce', '-adce', + '-die', '-simplifycfg', '-instcombine', '-strip-dead-prototypes', '-globaldce', + '-constmerge', '-constprop', '-instnamer', '-domtree', '-instcombine'] -BIN2LLVMIR_PARAMS = ['-provider-init', '-decoder', '-verify', '-main-detection', '-idioms-libgcc', '-inst-opt', '-register', - '-cond-branch-opt', '-syscalls', '-stack', '-constants', '-param-return', '-local-vars', '-inst-opt', '-simple-types', - '-generate-dsm', '-remove-asm-instrs', '-class-hierarchy', '-select-fncs', '-unreachable-funcs', '-inst-opt', - '-value-protect', *BIN2LLVMIR_LLVM_PASSES_ONLY, *BIN2LLVMIR_LLVM_PASSES_ONLY, '-simple-types', - '-stack-ptr-op-remove', '-inst-opt', '-idioms', '-global-to-local', '-dead-global-assign', '-instcombine', - '-phi2seq', '-value-protect', *BIN2LLVMIR_PARAMS_DISABLES] +BIN2LLVMIR_PARAMS = ['-provider-init', '-decoder', '-verify', '-main-detection', '-idioms-libgcc', '-inst-opt', + '-register', '-cond-branch-opt', '-syscalls', '-stack', '-constants', '-param-return', + '-local-vars', '-inst-opt', '-simple-types', '-generate-dsm', '-remove-asm-instrs', + '-class-hierarchy', '-select-fncs', '-unreachable-funcs', '-inst-opt', '-value-protect', + *BIN2LLVMIR_LLVM_PASSES_ONLY, *BIN2LLVMIR_LLVM_PASSES_ONLY, '-simple-types', + '-stack-ptr-op-remove', '-inst-opt', '-idioms', '-global-to-local', '-dead-global-assign', + '-instcombine', '-phi2seq', '-value-protect', *BIN2LLVMIR_PARAMS_DISABLES] # Paths to tools. FILEINFO = os.path.join(INSTALL_BIN_DIR, 'retdec-fileinfo') + FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES = [os.path.join(INSTALL_SHARE_YARA_DIR, 'signsrch', 'signsrch.yara')] -FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES = [os.path.join(INSTALL_SHARE_YARA_DIR, 'signsrch', 'signsrch_regex.yara')] +FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES = [ + os.path.join(INSTALL_SHARE_YARA_DIR, 'signsrch', 'signsrch_regex.yara')] + AR = os.path.join(INSTALL_BIN_DIR, 'retdec-ar-extractor') BIN2PAT = os.path.join(INSTALL_BIN_DIR, 'retdec-bin2pat') PAT2YARA = os.path.join(INSTALL_BIN_DIR, 'retdec-pat2yara') CONFIGTOOL = os.path.join(INSTALL_BIN_DIR, 'retdec-config') EXTRACT = os.path.join(INSTALL_BIN_DIR, 'retdec-macho-extractor') -DECOMPILER_SH = os.path.join(INSTALL_BIN_DIR, 'retdec-decompiler.sh') -ARCHIVE_DECOMPILER_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_archive_decompiler.py') -SIG_FROM_LIB_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_signature_from_library_creator.py') -UNPACK_PY = os.path.join(INSTALL_BIN_DIR, 'retdec_unpacker.py') +DECOMPILER = os.path.join(INSTALL_BIN_DIR, 'retdec_decompiler.py') +ARCHIVE_DECOMPILER = os.path.join(INSTALL_BIN_DIR, 'retdec_archive_decompiler.py') +SIG_FROM_LIB = os.path.join(INSTALL_BIN_DIR, 'retdec_signature_from_library_creator.py') +UNPACK = os.path.join(INSTALL_BIN_DIR, 'retdec_unpacker.py') LLVMIR2HLL = os.path.join(INSTALL_BIN_DIR, 'retdec-llvmir2hll') BIN2LLVMIR = os.path.join(INSTALL_BIN_DIR, 'retdec-bin2llvmir') IDA_COLORIZER = os.path.join(INSTALL_BIN_DIR, 'retdec-color-c.py') diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index f836f9353..2cd6a6662 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """Decompiles the given file into the selected target high-level language.""" diff --git a/scripts/retdec_fileinfo.py b/scripts/retdec_fileinfo.py index d8a91d946..f1c2bc403 100644 --- a/scripts/retdec_fileinfo.py +++ b/scripts/retdec_fileinfo.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """When analyzing an archive, use the archive decompilation script `--list` instead of `fileinfo` because fileinfo is currently unable to analyze archives. @@ -54,8 +54,8 @@ def parse_args(): if args.json: archive_decompiler_args.append('--json') - res = ArchiveDecompiler(archive_decompiler_args).decompile_archive() - sys.exit(res) + decompiler = ArchiveDecompiler(archive_decompiler_args) + sys.exit(decompiler.decompile_archive()) # We are not analyzing an archive, so proceed to fileinfo. fileinfo_params = [args.file] @@ -67,4 +67,4 @@ def parse_args(): for par in config.FILEINFO_EXTERNAL_YARA_EXTRA_CRYPTO_DATABASES: fileinfo_params.extend(['--crypto', par]) - subprocess.call([config.FILEINFO] + fileinfo_params, shell=True) + subprocess.call([config.FILEINFO, *fileinfo_params], shell=True) diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 633fd6da0..7b2dfd772 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -130,6 +130,7 @@ def run(self): if result != 0: self.print_error_and_cleanup('utility bin2pat failed when processing %s' % lib_path) + return 1 # Remove extracted objects continuously. if not self.args.no_cleanup: @@ -142,26 +143,26 @@ def run(self): for d in object_dirs: if os.path.exists(d): shutil.rmtree(d) - # sys.exit(0) - return 1 + return 0 # Create final .yara file from .pat files. if self.args.logfile: result = subprocess.call( - [config.PAT2YARA] + pattern_files + ['--min-pure', str(self.args.min_pure), '-o', self.file_path, '-l', - self.file_path + '.log', self.ignore_nop, - str(self.args.ignore_nops)], + [config.PAT2YARA, *pattern_files, '--min-pure', str(self.args.min_pure), '-o', self.file_path, '-l', + self.file_path + '.log', self.ignore_nop, + str(self.args.ignore_nops)], shell=True) if result != 0: self.print_error_and_cleanup('utility pat2yara failed') else: result = subprocess.call( - [config.PAT2YARA] + pattern_files + ['--min-pure', str(self.args.min_pure), '-o', self.file_path, - self.ignore_nop, str(self.args.ignore_nops)], shell=True) + [config.PAT2YARA, *pattern_files, '--min-pure', str(self.args.min_pure), '-o', self.file_path, + self.ignore_nop, str(self.args.ignore_nops)], shell=True) if result != 0: self.print_error_and_cleanup('utility pat2yara failed') + return 1 # Do cleanup. if not self.args.no_cleanup: diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index 875eedfd1..be6b0aaa6 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """Runs all the installed unit tests.""" diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index cbc9327f6..b17bb2e4a 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -106,13 +106,13 @@ def _check_arguments(self): if max_memory > 0: return True except ValueError: - Utils.print_error( - 'Invalid value for --max-memory: %s (expected a positive integer)' % self.args.max_memory) + Utils.print_error('Invalid value for --max-memory: %s (expected a positive integer)' + % self.args.max_memory) return False # Convert to absolute paths. - self.input = os.path.abspath(self.args.file) #Utils.get_realpath(self.args.input) - self.output = os.path.abspath(self.output) #Utils.get_realpath(self.output) + self.input = os.path.abspath(self.args.file) + self.output = os.path.abspath(self.output) return True From e21840f3b8bf92ccfc00af74e9a210ee08d356eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 21:11:22 +0200 Subject: [PATCH 30/48] Fix error in decompiler script --- scripts/retdec_decompiler.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 2cd6a6662..288e6c252 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -327,8 +327,6 @@ def check_arguments(self): """Check proper combination of input arguments. """ - global PICKED_FILE - # Check whether the input file was specified. if not self.args.input: Utils.print_error('No input file was specified') @@ -489,7 +487,7 @@ def check_arguments(self): # Suffix .macho self.output = input_name[:-5] + self.args.hll else: - self.output = self.output + PICKED_FILE + '.' + self.args.hll + self.output = self.args.input + '.' + self.args.hll # If the output file name matches the input file name, we have to change the # output file name. Otherwise, the input file gets overwritten. From 576e6058adf432ecd0cdf03603bcf53bd8e64f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Mon, 18 Jun 2018 21:45:28 +0200 Subject: [PATCH 31/48] Try to debug failure reason Fix test runner Use Python 3.5 on ubuntu --- scripts/retdec_decompiler.py | 6 +++--- scripts/retdec_tests_runner.py | 9 +++++---- scripts/retdec_utils.py | 7 +++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 288e6c252..ed0d24b1d 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -712,11 +712,12 @@ def decompile(self): if Utils.is_macho_archive(self.input): out_archive = self.output + '.a' - if self.arch: + if self.args.arch: print() print('##### Restoring static library with architecture family ' + self.args.arch + '...') print( - 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' + self.input) + 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' + + self.input) _, extract_rc, _ = cmd.run_cmd( [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input]) @@ -726,7 +727,6 @@ def decompile(self): '\'. File contains these architecture families:') cmd.run_cmd([config.EXTRACT, '--list', self.input]) self.cleanup() - # sys.exit(1) return 1 else: # Pick best architecture diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index be6b0aaa6..d6e85098d 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -9,10 +9,11 @@ import retdec_config as config """First argument can be verbose.""" -if sys.argv[1] == '-v' or sys.argv[1] == '--verbose': - verbose = True -else: - verbose = False +if len(sys.argv) > 1: + if sys.argv[1] in ['-v', '--verbose']: + verbose = True + else: + verbose = False def print_colored(message, color): diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index ba5a2be28..009010e79 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -331,8 +331,11 @@ def is_macho_archive(path): Returns - 0 if file is archive 1 if file is not archive """ - return subprocess.call([config.EXTRACT, '--check-archive', path], shell=True, - stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) != 2 + ret = subprocess.call([config.EXTRACT, '--check-archive', path], shell=True, + stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) + print('Is macho archive returend: ' + str(ret)) + + return ret != 2 @staticmethod def is_decimal_number(num): From 287f7503b0b30c837bf4fed373e7ab1bcd62a67c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 00:21:48 +0200 Subject: [PATCH 32/48] Use newer Python version and fix some errors --- .travis.yml | 3 ++- scripts/retdec_decompiler.py | 29 ++++++++++++----------- scripts/retdec_tests_runner.py | 2 +- scripts/retdec_unpacker.py | 4 ++-- scripts/retdec_utils.py | 43 +++++++++++++++++----------------- 5 files changed, 42 insertions(+), 39 deletions(-) diff --git a/.travis.yml b/.travis.yml index 58855a4e6..667537e05 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,7 @@ matrix: addons: apt: sources: + - deadsnakes - ubuntu-toolchain-r-test packages: - build-essential @@ -19,7 +20,7 @@ matrix: - g++-4.9 - cmake - perl - - python3 + - python3.5 - flex - bison - autoconf diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index ed0d24b1d..331173307 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -941,9 +941,7 @@ def decompile(self): # RET_UNPACK_OK=0 # RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK=1 - # RET_NOTHING_TO_DO=2 # RET_UNPACKER_FAILED_OTHERS_OK=3 - # RET_UNPACKER_FAILED=4 if unpacker_rc == Unpacker.RET_UNPACK_OK or unpacker_rc == Unpacker.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK \ or unpacker_rc == Unpacker.RET_UNPACKER_FAILED_OTHERS_OK: @@ -1400,26 +1398,29 @@ def thread4(): print('##### Converting .dot files to the desired format...') if self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto': - print( - 'RUN: dot -T' + self.args.graph_format + ' ' + self.output + '.cg.dot > ' + self.output + '.cg.' + self.args.graph_format) + if Utils.tool_exists('dot'): + print('RUN: dot -T' + self.args.graph_format + ' ' + self.output + '.cg.dot > ' + self.output + '.cg.' + + self.args.graph_format) - cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], - stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], + stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) + else: + print('Please install \'Graphviz\' to generate graphics.') if self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto': - for cfg in glob.glob(self.output + '.cfg.*.dot'): - print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( - os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) + if Utils.tool_exists('dot'): + for cfg in glob.glob(self.output + '.cfg.*.dot'): + print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( + os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) - cmd.run_cmd(['dot', '-T' + self.args.graph_format, cfg], - stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, cfg], + stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) + else: + print('Please install \'Graphviz\' to generate graphics.') # Remove trailing whitespace and the last redundant empty new line from the # generated output (if any). It is difficult to do this in the back-end, so we # do it here. - # Note: Do not use the -i flag (in-place replace) as there is apparently no way - # of getting sed -i to work consistently on both MacOS and Linux. - # TODO with open(self.output, 'r') as file: new = [line.rstrip() for line in file] diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index d6e85098d..0c8512b45 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -76,7 +76,7 @@ def run_unit_tests_in_dir(path): print() # TODO verbose support - return_code = subprocess.call([unit_test, '--gtest_color=yes'], shell=True) + return_code = subprocess.call([os.path.abspath(unit_test), '--gtest_color=yes'], shell=True) if return_code != 0: tests_failed = True diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index b17bb2e4a..045805a0e 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -145,7 +145,7 @@ def _unpack(self, output): # UNPACKER_EXIT_CODE_PREPROCESSING_ERROR print('##### Unpacking by using generic unpacker: failed') - if not Utils.is_windows(): + if Utils.tool_exists('upx'): # Do not return -> try the next unpacker # Try to unpack via UPX print() @@ -168,7 +168,7 @@ def _unpack(self, output): # not upx-packed print('##### Unpacking by using UPX: nothing to do') else: - print('##### UPX not available on Windows') + print('##### \'upx\' not available: nothing to do') # Do not return -> try the next unpacker # Return. diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 009010e79..65275c2ae 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -186,6 +186,10 @@ def runProcess(): class Utils: + @staticmethod + def tool_exists(tool_name): + return shutil.which(tool_name) is not None + @staticmethod def remove_file_forced(file): if os.path.exists(file): @@ -223,36 +227,34 @@ def print_warning(warning): def has_archive_signature(path): """Check if file has any ar signature. 1 argument is needed - file path - Returns - 0 if file has ar signature - 1 if number of arguments is incorrect - 2 no signature + Returns - True if file has ar signature + False no signature """ - if subprocess.call([config.AR, path, '--arch-magic'], shell=True): - return 0 - return 2 + ret = subprocess.call([config.AR, path, '--arch-magic'], shell=True) + return ret == 0 @staticmethod def has_thin_archive_signature(path): """Check if file has thin ar signature. 1 argument is needed - file path - Returns - 0 if file has thin ar signature - 1 if number of arguments is incorrect - 2 no signature + Returns - True if file has thin ar signature + False no signature """ - if subprocess.call([config.AR, path, '--thin-magic'], shell=True): - return 0 - return 2 + ret = subprocess.call([config.AR, path, '--thin-magic'], shell=True) + return ret == 0 @staticmethod def is_valid_archive(path): """Check if file is an archive we can work with. 1 argument is needed - file path - Returns - 0 if file is valid archive - 1 if file is invalid archive + Returns - True if file is valid archive + False if file is invalid archive """ # We use our own messages so throw original output away. - return subprocess.call([config.AR, path, '--valid'], shell=True, stderr=subprocess.STDOUT, - stdout=None) + ret = subprocess.call([config.AR, path, '--valid'], shell=True, stderr=subprocess.STDOUT, + stdout=None) + + return ret == 0 @staticmethod def archive_object_count(path): @@ -328,14 +330,13 @@ def archive_get_by_index(archive, index, output): def is_macho_archive(path): """Check if file is Mach-O universal binary with archives. 1 argument is needed - file path - Returns - 0 if file is archive - 1 if file is not archive + Returns - True if file is archive + False if file is not archive """ ret = subprocess.call([config.EXTRACT, '--check-archive', path], shell=True, - stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) - print('Is macho archive returend: ' + str(ret)) + stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) - return ret != 2 + return ret == 0 @staticmethod def is_decimal_number(num): From df5058afb987c35eeda038dbc9e29c6954b0b3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 10:49:36 +0200 Subject: [PATCH 33/48] [skip ci] Little cleanup to make the code more clear Don't parse_args twice --- scripts/retdec_archive_decompiler.py | 4 +- scripts/retdec_decompiler.py | 297 +++++++++--------- .../retdec_signature_from_library_creator.py | 6 +- scripts/retdec_tests_runner.py | 2 +- scripts/retdec_unpacker.py | 4 +- 5 files changed, 154 insertions(+), 159 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 1a78b7b07..0c3f82021 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -193,7 +193,5 @@ def decompile_archive(self): if __name__ == '__main__': - args = parse_args(sys.argv) - - archive_decompiler = ArchiveDecompiler(args) + archive_decompiler = ArchiveDecompiler(sys.argv) sys.exit(archive_decompiler.decompile_archive()) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 331173307..4663ca0b9 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -17,7 +17,7 @@ from retdec_utils import Utils, CmdRunner -def parse_args(): +def parse_args(_args): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -67,7 +67,6 @@ def parse_args(): parser.add_argument('-p', '--pdb', dest='pdb', metavar='FILE', - default='', help='File with PDB debug information.') parser.add_argument('--generate-log', @@ -151,6 +150,7 @@ def parse_args(): parser.add_argument('--backend-llvmir2bir-converter', dest='backend_llvmir2bir_converter', default='orig', + choices=['orig', 'new'], help='Name of the converter from LLVM IR to BIR.') parser.add_argument('--backend-no-compound-operators', @@ -159,6 +159,7 @@ def parse_args(): parser.add_argument('--backend-no-debug', dest='backend_no_debug', + action='store_true', help='Disables the emission of debug messages, such as phases.') parser.add_argument('--backend-no-debug-comments', @@ -272,12 +273,10 @@ def parse_args(): parser.add_argument('--static-code-sigfile', dest='static_code_sigfile', - default=[], help='Adds additional signature file for static code detection.') parser.add_argument('--static-code-archive', dest='static_code_archive', - default=[], help='Adds additional signature file for static code detection from given archive.') parser.add_argument('--no-default-static-signatures', @@ -302,15 +301,19 @@ def parse_args(): class Decompiler: def __init__(self, _args): - self.args = _args + self.args = parse_args(_args) self.timeout = 300 - self.input = '' - self.output = '' - self.config = '' + + self.input_file = '' + self.output_file = '' + self.config_file = '' self.selected_ranges = [] self.selected_functions = [] - + self.signatures_to_remove = [] self.arch = '' + self.mode = '' + self.pdb_file = '' + self.out_unpacked = '' self.out_frontend_ll = '' self.out_frontend_bc = '' @@ -318,24 +321,22 @@ def __init__(self, _args): self.out_backend_ll = '' self.out_restored = '' self.out_archive = '' - self.signatures_to_remove = [] self.tool_log_file = '' - self.TOOL_LOG_FILE = '' - def check_arguments(self): """Check proper combination of input arguments. """ # Check whether the input file was specified. - if not self.args.input: + if self.args.input: + if not os.access(self.args.input, os.R_OK): + Utils.print_error('The input file \'%s\' does not exist or is not readable' % self.args.input) + return False + self.input_file = self.args.input + else: Utils.print_error('No input file was specified') return False - if not os.access(self.args.input, os.R_OK): - Utils.print_error('The input file \'%s\' does not exist or is not readable' % self.args.input) - return False - if self.args.max_memory: if self.args.no_memory_limit: Utils.print_error('Clashing options: --max-memory and --no-memory-limit') @@ -346,8 +347,8 @@ def check_arguments(self): if max_memory > 0: return True except ValueError: - Utils.print_error( - 'Invalid value for --max-memory: %s (expected a positive integer)' % self.args.max_memory) + Utils.print_error('Invalid value for --max-memory: %s (expected a positive integer)' + % self.args.max_memory) return False if self.args.static_code_archive: @@ -405,16 +406,18 @@ def check_arguments(self): Utils.print_error('The input PDB file \'%s\' does not exist or is not readable' % self.args.pdb) return False - self.args.pdb = os.path.abspath(self.args.pdb) + self.pdb_file = os.path.abspath(self.args.pdb) # Try to detect desired decompilation mode if not set by user. # We cannot detect 'raw' mode because it overlaps with 'bin' (at least not based on extension). if not self.args.mode: if self.args.input.endswith('ll'): # Suffix .ll - self.args.mode = 'll' + self.mode = 'll' else: - self.args.mode = 'bin' + self.mode = 'bin' + else: + self.mode = self.args.mode # Print warning message about unsupported combinations of options. if self.args.mode == 'll': @@ -470,33 +473,33 @@ def check_arguments(self): if not self.args.output: # No output file was given, so use the default one. - input_name = self.args.input + input_name = self.input_file if input_name.endswith('ll'): # Suffix .ll - self.output = input_name[:-2] + self.args.hll + self.output_file = input_name[:-2] + self.args.hll elif input_name.endswith('exe'): # Suffix .exe - self.output = input_name[:-3] + self.args.hll + self.output_file = input_name[:-3] + self.args.hll elif input_name.endswith('elf'): # Suffix .elf - self.output = input_name[:-3] + self.args.hll + self.output_file = input_name[:-3] + self.args.hll elif input_name.endswith('ihex'): # Suffix .ihex - self.output = input_name[:-4] + self.args.hll + self.output_file = input_name[:-4] + self.args.hll elif input_name.endswith('macho'): # Suffix .macho - self.output = input_name[:-5] + self.args.hll + self.output_file = input_name[:-5] + self.args.hll else: - self.output = self.args.input + '.' + self.args.hll + self.output_file = self.input_file + '.' + self.args.hll # If the output file name matches the input file name, we have to change the # output file name. Otherwise, the input file gets overwritten. - if self.args.input == self.output: - self.output = self.args.input + '.out.' + self.args.hll + if self.input_file == self.output_file: + self.output_file = self.input_file + '.out.' + self.args.hll # Convert to absolute paths. - self.input = os.path.abspath(self.args.input) - self.output = os.path.abspath(self.output) + self.input_file = os.path.abspath(self.input_file) + self.output_file = os.path.abspath(self.output_file) if self.args.arch: self.arch = self.args.arch @@ -507,19 +510,17 @@ def print_warning_if_decompiling_bytecode(self): """Prints a warning if we are decompiling bytecode.""" cmd = CmdRunner() - bytecode, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--bytecode']) - # bytecode = os.popen('\'' + config.CONFIGTOOL + '\' \'' + CONFIG + '\' --read --bytecode').read().rstrip('\n') + bytecode, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--bytecode']) if bytecode != '': - Utils.print_warning( - 'Detected %s bytecode, which cannot be decompiled by our machine-code decompiler.' - ' The decompilation result may be inaccurate.' % bytecode) + Utils.print_warning('Detected %s bytecode, which cannot be decompiled by our machine-code decompiler.' + ' The decompilation result may be inaccurate.' % bytecode) def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): """Checks whether the decompilation should be forcefully stopped because of the - --stop-after parameter. If so, self.cleanup is run and the script exits with 0. + --stop-after parameter. If so, cleanup is run and the script exits with 0. Arguments: - $1 Name of the tool. + tool_name Name of the tool. The function expects the $STOP_AFTER variable to be set. """ @@ -541,8 +542,8 @@ def cleanup(self): Utils.remove_dir_forced(self.out_frontend_ll) Utils.remove_dir_forced(self.out_frontend_bc) - if self.config != self.args.config_db: - Utils.remove_dir_forced(self.config) + if self.config_file != self.args.config_db: + Utils.remove_dir_forced(self.config_file) Utils.remove_dir_forced(self.out_backend_bc) Utils.remove_dir_forced(self.out_backend_ll) @@ -556,8 +557,8 @@ def cleanup(self): Utils.remove_dir_forced(sig) # Signatures generated from archives - if self.TOOL_LOG_FILE: - Utils.remove_dir_forced(self.TOOL_LOG_FILE) + if self.tool_log_file: + Utils.remove_dir_forced(self.tool_log_file) def generate_log(self): """ @@ -582,7 +583,7 @@ def generate_log(self): ' \'%s\',\n\t\'llvmir2hll_memory\' : \'%s\'\n}\n' print(log_structure % ( - self.input, self.args.pdb, LOG_DECOMPILATION_START_DATE, LOG_DECOMPILATION_END_DATE, self.args.mode, + self.input, self.pdb_file, LOG_DECOMPILATION_START_DATE, LOG_DECOMPILATION_END_DATE, self.mode, self.args.arch, FORMAT, LOG_FILEINFO_RC, LOG_UNPACKER_RC, LOG_BIN2LLVMIR_RC, LOG_LLVMIR2HLL_RC, LOG_FILEINFO_OUTPUT, LOG_UNPACKER_OUTPUT, LOG_BIN2LLVMIR_OUTPUT, LOG_LLVMIR2HLL_OUTPUT, @@ -660,7 +661,6 @@ def get_tool_memory_usage(self, tool): # def json_escape(self, text): # We need to escape backslashes (\), double quotes ('), and replace new lines with '\n'. - return re.escape(text) def remove_colors(self, text): @@ -693,11 +693,11 @@ def decompile(self): # the data are stored in RAM instead on the disk, which should provide faster access. tmp_dir = '/tmp/decompiler_log' os.makedirs(tmp_dir, exist_ok=True) - file_md5 = self.string_to_md5(self.output) + file_md5 = self.string_to_md5(self.output_file) tool_log_file = tmp_dir + '/' + file_md5 + '.tool' # Raw. - if self.args.mode == 'raw': + if self.mode == 'raw': # Entry point for THUMB must be odd. if self.args.arch == 'thumb' or (self.args.raw_entry_point % 2) == 0: self.args.raw_entry_point = (self.args.raw_entry_point + 1) @@ -705,75 +705,75 @@ def decompile(self): self.args.keep_unreachable_funcs = True # Check for archives. - if self.args.mode == 'bin': + if self.mode == 'bin': # Check for archives packed in Mach-O Universal Binaries. print('##### Checking if file is a Mach-O Universal static library...') - print('RUN: ' + config.EXTRACT + ' --list ' + self.input) + print('RUN: ' + config.EXTRACT + ' --list ' + self.input_file) - if Utils.is_macho_archive(self.input): - out_archive = self.output + '.a' + if Utils.is_macho_archive(self.input_file): + out_archive = self.output_file + '.a' if self.args.arch: print() print('##### Restoring static library with architecture family ' + self.args.arch + '...') print( 'RUN: ' + config.EXTRACT + ' --family ' + self.args.arch + ' --out ' + out_archive + ' ' - + self.input) + + self.input_file) _, extract_rc, _ = cmd.run_cmd( - [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input]) + [config.EXTRACT, '--family', self.args.arch, '--out', out_archive, self.input_file]) if not extract_rc: # Architecture not supported print('Invalid --arch option \'' + self.args.arch + '\'. File contains these architecture families:') - cmd.run_cmd([config.EXTRACT, '--list', self.input]) + cmd.run_cmd([config.EXTRACT, '--list', self.input_file]) self.cleanup() return 1 else: # Pick best architecture print() print('##### Restoring best static library for decompilation...') - print('RUN: ' + config.EXTRACT + ' --best --out ' + out_archive + ' ' + self.input) - cmd.run_cmd([config.EXTRACT, '--best', '--out', out_archive, self.input]) + print('RUN: ' + config.EXTRACT + ' --best --out ' + out_archive + ' ' + self.input_file) + cmd.run_cmd([config.EXTRACT, '--best', '--out', out_archive, self.input_file]) - self.input = out_archive + self.input_file = out_archive print() print('##### Checking if file is an archive...') - print('RUN: ' + config.AR + ' --arch-magic ' + self.input) + print('RUN: ' + config.AR + ' --arch-magic ' + self.input_file) - if Utils.has_archive_signature(self.input): + if Utils.has_archive_signature(self.input_file): print('This file is an archive!') # Check for thin signature. - if Utils.has_thin_archive_signature(self.input): + if Utils.has_thin_archive_signature(self.input_file): self.cleanup() Utils.print_error('File is a thin archive and cannot be decompiled.') return 1 # Check if our tools can handle it. - if not Utils.is_valid_archive(self.input): + if not Utils.is_valid_archive(self.input_file): self.cleanup() Utils.print_error('The input archive has invalid format.') return 1 # Get and check number of objects. - arch_object_count = Utils.archive_object_count(self.input) + arch_object_count = Utils.archive_object_count(self.input_file) if arch_object_count <= 0: self.cleanup() Utils.print_error('The input archive is empty.') return 1 # Prepare object output path. - out_restored = self.output + '.restored' + out_restored = self.output_file + '.restored' # Pick object by index. if self.args.ar_index: print() print('##### Restoring object file on index '' + (self.args.ar_index) + '' from archive...') - print('RUN: ' + config.AR + ' ' + self.input + ' --index ' + self.args.ar_index + ' --output ' + print('RUN: ' + config.AR + ' ' + self.input_file + ' --index ' + self.args.ar_index + ' --output ' + out_restored) - if not Utils.archive_get_by_index(self.input, self.args.ar_index, out_restored): + if not Utils.archive_get_by_index(self.input_file, self.args.ar_index, out_restored): self.cleanup() valid_index = (arch_object_count - 1) @@ -787,26 +787,26 @@ def decompile(self): '\' was not found in the input archive. The only valid index is 0.') return 1 - self.input = out_restored + self.input_file = out_restored # Pick object by name elif self.args.ar_name: print() print('##### Restoring object file with name '' + (self.args.ar_name) + '' from archive...') - print('RUN: ' + config.AR + ' ' + self.input + ' --name ' + self.args.ar_name + ' --output ' + print('RUN: ' + config.AR + ' ' + self.input_file + ' --name ' + self.args.ar_name + ' --output ' + out_restored) - if not Utils.archive_get_by_name(self.input, self.args.ar_name, out_restored): + if not Utils.archive_get_by_name(self.input_file, self.args.ar_name, out_restored): self.cleanup() Utils.print_error('File named %s was not found in the input archive.' % self.args.ar_name) return 1 - self.input = out_restored + self.input_file = out_restored else: # Print list of files. print('Please select file to decompile with either \' --ar-index=n\'') print('or \' --ar-name=string\' option. Archive contains these files:') - Utils.archive_list_numbered_content(self.input) + Utils.archive_list_numbered_content(self.input_file) self.cleanup() return 1 else: @@ -818,57 +818,59 @@ def decompile(self): print('Not an archive, going to the next step.') - if self.args.mode in ['bin', 'raw']: + if self.mode in ['bin', 'raw']: # Assignment of other used variables. - name = os.path.splitext(self.output)[0] - out_frontend = self.output + '.frontend' + name = os.path.splitext(self.output_file)[0] + out_frontend = self.output_file + '.frontend' self.out_unpacked = name + '-unpacked' self.out_frontend_ll = out_frontend + '.ll' self.out_frontend_bc = out_frontend + '.bc' - self.config = self.output + '.json' + self.config_file = self.output_file + '.json' - if self.config != self.args.config_db: - Utils.remove_file_forced(self.config) + if self.config_file != self.args.config_db: + Utils.remove_file_forced(self.config_file) if self.args.config_db: - shutil.copyfile(self.args.config_db, self.config) + shutil.copyfile(self.args.config_db, self.config_file) # Preprocess existing file or create a new, empty JSON file. - if os.path.isfile(self.config): - cmd.run_cmd([config.CONFIGTOOL, self.config, '--preprocess']) + if os.path.isfile(self.config_file): + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--preprocess']) else: - with open(self.config, 'w') as f: + with open(self.config_file, 'w') as f: f.write('{}') # Raw data needs architecture, endianess and optionaly sections's vma and entry point to be specified. - if self.args.mode == 'raw': + if self.mode == 'raw': if not self.arch or self.arch == 'unknown' or self.arch == '': - Utils.print_error('Option -a|--arch must be used with mode ' + self.args.mode) + Utils.print_error('Option -a|--arch must be used with mode ' + self.mode) return 1 if not self.args.endian: - Utils.print_error('Option -e|--endian must be used with mode ' + self.args.mode) + Utils.print_error('Option -e|--endian must be used with mode ' + self.mode) return 1 - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--format', 'raw']) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32']) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--file-class', '32']) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--format', 'raw']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--arch', self.arch]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--bit-size', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--file-class', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--endian', self.args.endian]) if self.args.raw_entry_point: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--entry-point', self.args.raw_entry_point]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--entry-point', + self.args.raw_entry_point]) if self.args.raw_section_vma: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--section-vma', self.args.raw_section_vma]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--section-vma', + self.args.raw_section_vma]) # # Call fileinfo to create an initial config file. # - fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] + fileinfo_params = ['-c', self.config_file, '--similarity', self.input_file, '--no-hashes=all'] if self.args.fileinfo_verbose: - fileinfo_params = ['-c', self.config, '--similarity', '--verbose', self.input] + fileinfo_params = ['-c', self.config_file, '--similarity', '--verbose', self.input_file] for par in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: fileinfo_params.extend(['--crypto', par]) @@ -919,7 +921,7 @@ def decompile(self): # # Unpacking. # - unpack_params = ['--extended-exit-codes', '--output', self.out_unpacked, self.input] + unpack_params = ['--extended-exit-codes', '--output', self.out_unpacked, self.input_file] if self.args.max_memory: unpack_params.extend(['--max-memory', self.args.max_memory]) @@ -946,11 +948,11 @@ def decompile(self): or unpacker_rc == Unpacker.RET_UNPACKER_FAILED_OTHERS_OK: # Successfully unpacked -> re-run fileinfo to obtain fresh information. - self.input = self.out_unpacked - fileinfo_params = ['-c', self.config, '--similarity', self.input, '--no-hashes=all'] + self.input_file = self.out_unpacked + fileinfo_params = ['-c', self.config_file, '--similarity', self.input_file, '--no-hashes=all'] if self.args.fileinfo_verbose: - fileinfo_params = ['-c', self.config, '--similarity', '--verbose', self.input] + fileinfo_params = ['-c', self.config_file, '--similarity', '--verbose', self.input_file] for pd in config.FILEINFO_EXTERNAL_YARA_PRIMARY_CRYPTO_DATABASES: fileinfo_params.extend(['--crypto', pd]) @@ -1006,17 +1008,17 @@ def decompile(self): # Check whether the architecture was specified. if self.arch: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--arch', self.arch]) else: # Get full name of the target architecture including comments in parentheses - arch_full, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--arch']) + arch_full, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--arch']) arch_full = arch_full.lower() # Strip comments in parentheses and all trailing whitespace self.arch = arch_full.strip() # Get object file format. - fileformat, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--format']) + fileformat, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--format']) fileformat = fileformat.lower() # Intel HEX needs architecture to be specified @@ -1029,10 +1031,10 @@ def decompile(self): Utils.print_error('Option -e|--endian must be used with format ' + fileformat) return 1 - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--arch', self.arch]) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--bit-size', '32']) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--file-class', '32']) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--endian', self.args.endian]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--arch', self.arch]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--bit-size', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--file-class', '32']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--endian', self.args.endian]) ords_dir = '' # Check whether the correct target architecture was specified. @@ -1054,7 +1056,7 @@ def decompile(self): # Check file class (e.g. 'ELF32', 'ELF64'). At present, we can only decompile 32-bit files. # Note: we prefer to report the 'unsupported architecture' error (above) than this 'generic' error. - fileclass, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--file-class']) + fileclass, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--file-class']) if fileclass not in ['16', '32']: if self.args.generate_log: @@ -1075,7 +1077,7 @@ def decompile(self): if sig_format in ['ihex', 'raw']: sig_format = 'elf' - endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config, '--read', '--endian']) + endian_result, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--endian']) if endian_result == 'little': sig_endian = 'le' @@ -1100,7 +1102,7 @@ def decompile(self): # Decompile unreachable functions. if self.args.keep_unreachable_funcs: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--keep-unreachable-funcs', 'true']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--keep-unreachable-funcs', 'true']) if self.args.static_code_archive: # Get signatures from selected archives. @@ -1115,12 +1117,12 @@ def decompile(self): # TODO replace command crop_arch_path, _, _ = cmd.run_cmd( 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'') - sig_out = self.output + '.' + crop_arch_path + '.' + lib_index + '.yara' + sig_out = self.output_file + '.' + crop_arch_path + '.' + lib_index + '.yara' # Call sig from lib tool sig_from_lib = SigFromLib([lib, '--output', sig_out]) if sig_from_lib.run(): - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', sig_out]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--user-signature', sig_out]) self.signatures_to_remove.append(sig_out) else: Utils.print_warning('Failed extracting signatures from file \'' + lib + '\'') @@ -1129,45 +1131,46 @@ def decompile(self): # Store paths of signature files into config for frontend. if not self.args.no_default_static_signatures: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--signatures', signatures_dir]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--signatures', signatures_dir]) # User provided signatures. if self.args.static_code_sigfile: for i in self.args.static_code_sigfile: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--user-signature', i]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--user-signature', i]) # Store paths of type files into config for frontend. if os.path.isdir(config.GENERIC_TYPES_DIR): - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--types', config.GENERIC_TYPES_DIR]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--types', config.GENERIC_TYPES_DIR]) # Store path of directory with ORD files into config for frontend (note: only directory, # not files themselves). if os.path.isdir(ords_dir): - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--ords', ords_dir + os.path.sep]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--ords', ords_dir + os.path.sep]) # Store paths to file with PDB debugging information into config for frontend. - if self.args.pdb: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--pdb-file', self.args.pdb]) + if self.pdb_file: + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--pdb-file', self.pdb_file]) # Store file names of input and output into config for frontend. - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--input-file', self.input]) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--frontend-output-file', self.out_frontend_ll]) - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--output-file', self.output]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--input-file', self.input_file]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--frontend-output-file', + self.out_frontend_ll]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--output-file', self.output_file]) # Store decode only selected parts flag. if self.args.selected_decode_only: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'true']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--decode-only-selected', 'true']) else: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--decode-only-selected', 'false']) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--decode-only-selected', 'false']) # Store selected functions or selected ranges into config for frontend. if self.args.selected_functions: for f in self.args.selected_functions: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--selected-func', f]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--selected-func', f]) if self.args.selected_ranges: for r in self.args.selected_ranges: - cmd.run_cmd([config.CONFIGTOOL, self.config, '--write', '--selected-range', r]) + cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--selected-range', r]) # Assignment of other used variables. # We have to ensure that the .bc version of the decompiled .ll file is placed @@ -1175,12 +1178,12 @@ def decompile(self): # race-condition problems when the same input .ll file is decompiled in # parallel processes because they would overwrite each other's .bc file. This # is most likely to happen in regression tests in the 'll' mode. - out_backend = self.output + '.backend' + out_backend = self.output_file + '.backend' - # If the input file is the same as $OUT_BACKEND_LL below, then we have to change the name of - # $OUT_BACKEND. Otherwise, the input file would get overwritten during the conversion. + # If the input file is the same as out_backend_ll below, then we have to change the name of + # out_backend. Otherwise, the input file would get overwritten during the conversion. if self.out_frontend_ll == out_backend + '.ll': - out_backend = self.output + '.backend.backend' + out_backend = self.output_file + '.backend.backend' self.out_backend_bc = out_backend + '.bc' self.out_backend_ll = out_backend + '.ll' @@ -1194,10 +1197,10 @@ def decompile(self): # Prevent bin2llvmir from removing unreachable functions. bin2llvmir_params.remove('-unreachable-funcs') - if self.config == '' and self.args.config_db: - self.config = self.args.config_db + if self.config_file == '' and self.args.config_db: + self.config_file = self.args.config_db - bin2llvmir_params.extend(['-config-path', self.config]) + bin2llvmir_params.extend(['-config-path', self.config_file]) if self.args.max_memory: bin2llvmir_params.extend(['-max-memory', self.args.max_memory]) @@ -1207,7 +1210,7 @@ def decompile(self): bin2llvmir_params.append('-max-memory-half-ram') print() - print('##### Decompiling ' + self.input + ' into ' + self.out_backend_bc + '...') + print('##### Decompiling ' + self.input_file + ' into ' + self.out_backend_bc + '...') print('RUN: ' + config.BIN2LLVMIR + ' ' + ' '.join(bin2llvmir_params) + ' -o ' + self.out_backend_bc) bin2llvmir_rc = 0 @@ -1259,16 +1262,16 @@ def thread2(): # modes 'bin' || 'raw' # LL mode goes straight to backend. - if self.args.mode == 'll': - self.out_backend_bc = self.input - self.config = self.args.config_db + if self.mode == 'll': + self.out_backend_bc = self.input_file + self.config_file = self.args.config_db # Create parameters for the $LLVMIR2HLL call. llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + self.args.backend_var_renamer, '-var-name-gen=fruit', '-var-name-gen-prefix=', '-call-info-obtainer=' + self.args.backend_call_info_obtainer, '-arithm-expr-evaluator=' + self.args.backend_arithm_expr_evaluator, '-validate-module', - '-llvmir2bir-converter=' + self.args.backend_llvmir2bir_converter, '-o', self.output, + '-llvmir2bir-converter=' + self.args.backend_llvmir2bir_converter, '-o', self.output_file, self.out_backend_bc] if not self.args.backend_no_debug: @@ -1277,8 +1280,8 @@ def thread2(): if not self.args.backend_no_debug_comments: llvmir2hll_params.append('-emit-debug-comments') - if self.config: - llvmir2hll_params.append('-config-path=' + self.config) + if self.config_file: + llvmir2hll_params.append('-config-path=' + self.config_file) if self.args.backend_semantics: llvmir2hll_params.extend(['-semantics', self.args.backend_semantics]) @@ -1340,7 +1343,7 @@ def thread2(): # Decompile the optimized IR code. print() - print('##### Decompiling ' + self.out_backend_bc + ' into ' + self.output + '...') + print('##### Decompiling ' + self.out_backend_bc + ' into ' + self.output_file + '...') print('RUN: ' + config.LLVMIR2HLL + ' ' + ' '.join(llvmir2hll_params)) llvmir2hll_rc = 0 @@ -1399,17 +1402,17 @@ def thread4(): if self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto': if Utils.tool_exists('dot'): - print('RUN: dot -T' + self.args.graph_format + ' ' + self.output + '.cg.dot > ' + self.output + '.cg.' - + self.args.graph_format) + print('RUN: dot -T' + self.args.graph_format + ' ' + self.output_file + '.cg.dot > ' + self.output_file + + '.cg.' + self.args.graph_format) - cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output + '.cg.dot'], - stdout=open(self.output + '.cg.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output_file + '.cg.dot'], + stdout=open(self.output_file + '.cg.' + self.args.graph_format, 'wb')) else: print('Please install \'Graphviz\' to generate graphics.') if self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto': if Utils.tool_exists('dot'): - for cfg in glob.glob(self.output + '.cfg.*.dot'): + for cfg in glob.glob(self.output_file + '.cfg.*.dot'): print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) @@ -1421,15 +1424,15 @@ def thread4(): # Remove trailing whitespace and the last redundant empty new line from the # generated output (if any). It is difficult to do this in the back-end, so we # do it here. - with open(self.output, 'r') as file: + with open(self.output_file, 'r') as file: new = [line.rstrip() for line in file] - with open(self.output, 'w') as fh: + with open(self.output_file, 'w') as fh: [fh.write('%s\n' % line) for line in new] # Colorize output file. if self.args.color_for_ida: - cmd.run_cmd([config.IDA_COLORIZER, self.output, self.config]) + cmd.run_cmd([config.IDA_COLORIZER, self.output_file, self.config_file]) # Store the information about the decompilation into the JSON file. if self.args.generate_log: @@ -1444,7 +1447,5 @@ def thread4(): if __name__ == '__main__': - args = parse_args() - - decompiler = Decompiler(args) + decompiler = Decompiler(sys.argv) sys.exit(decompiler.decompile()) diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 7b2dfd772..62749015b 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -81,7 +81,7 @@ def _check_arguments(self): return False self.file_path = self.args.output - dir_name = os.path.dirname(Utils.get_realpath(self.file_path)) + dir_name = os.path.dirname(os.path.abspath(self.file_path)) self.tmp_dir_path = os.path.join(dir_name, 'XXXXXXXXX') if self.args.ignore_nops: @@ -172,7 +172,5 @@ def run(self): if __name__ == '__main__': - args = parse_args(sys.argv) - - sig = SigFromLib(args) + sig = SigFromLib(sys.argv) sys.exit(sig.run()) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index 0c8512b45..1dbbc1aee 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -49,7 +49,7 @@ def unit_tests_in_dir(path): for file in os.listdir(path): file_name = os.path.basename(file) - if file_name.startswith('retdec-tests-'): + if file_name.startswith('retdec-tests-') and not file.endswith('.sh'): tests.append(file) tests.sort() diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index 045805a0e..02c5beff8 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -207,8 +207,6 @@ def unpack_all(self): if __name__ == '__main__': - args = parse_args(sys.argv) - - unpacker = Unpacker(args) + unpacker = Unpacker(sys.argv) _, rc = unpacker.unpack_all() sys.exit(rc) From 8a89ad503896322e5409fa9cdd5a955a088a3573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 14:25:43 +0200 Subject: [PATCH 34/48] [skip ci] First version of reimplementing logging --- scripts/retdec_decompiler.py | 299 +++++++++++------------------------ scripts/retdec_unpacker.py | 4 +- scripts/retdec_utils.py | 48 +++--- 3 files changed, 115 insertions(+), 236 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 4663ca0b9..80e372459 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -9,6 +9,7 @@ import re import shutil import sys +import tempfile import time import retdec_config as config @@ -71,6 +72,7 @@ def parse_args(_args): parser.add_argument('--generate-log', dest='generate_log', + action='store_true', help='Generate log') parser.add_argument('--ar-index', @@ -312,6 +314,7 @@ def __init__(self, _args): self.signatures_to_remove = [] self.arch = '' self.mode = '' + self.format = '' self.pdb_file = '' self.out_unpacked = '' @@ -323,6 +326,27 @@ def __init__(self, _args): self.out_archive = '' self.tool_log_file = '' + self.log_decompilation_start_date = '' + + self.log_fileinfo_rc = 0 + self.log_fileinfo_time = 0 + self.LOG_FILEINFO_MEMORY = 0 + self.log_fileinfo_output = '' + self.log_fileinfo_memory = 0 + + self.log_unpacker_output = '' + self.log_unpacker_rc = 0 + + self.log_bin2llvmir_rc = 0 + self.log_bin2llvmir_time = 0 + self.log_bin2llvmir_memory = 0 + self.log_bin2llvmir_output = '' + + self.log_llvmir2hll_rc = 0 + self.log_llvmir2hll_time = 0 + self.log_llvmir2hll_memory = 0 + self.log_llvmir2hll_output = '' + def check_arguments(self): """Check proper combination of input arguments. """ @@ -561,115 +585,37 @@ def cleanup(self): Utils.remove_dir_forced(self.tool_log_file) def generate_log(self): - """ - LOG_FILE = self.output + '.decompilation.log' - LOG_DECOMPILATION_END_DATE = time.strftime('%S') - - LOG_FILEINFO_OUTPUT = self.json_escape(LOG_FILEINFO_OUTPUT) - LOG_UNPACKER_OUTPUT = self.json_escape(LOG_UNPACKER_OUTPUT) - LOG_BIN2LLVMIR_OUTPUT = self.remove_colors(LOG_BIN2LLVMIR_OUTPUT) - LOG_BIN2LLVMIR_OUTPUT = self.json_escape(LOG_BIN2LLVMIR_OUTPUT) - LOG_LLVMIR2HLL_OUTPUT = self.remove_colors(LOG_LLVMIR2HLL_OUTPUT) - LOG_LLVMIR2HLL_OUTPUT = self.json_escape(LOG_LLVMIR2HLL_OUTPUT) - - log_structure = '{\n\t\'input_file\' : \'%s\',\n\t\'pdb_file\' : \'%s\',\n\t\'start_date\' :' \ - ' \'%s\',\n\t\'end_date\' : \'%s\',\n\t\'mode\' : \'%s\',\n\t\'arch\' : \'%s\',\n\t\'format\'' \ - ' : \'%s\',\n\t\'fileinfo_rc\' : \'%s\',\n\t\'unpacker_rc\' : \'%s\',\n\t\'bin2llvmir_rc\'' \ - ' : \'%s\',\n\t\'llvmir2hll_rc\' : \'%s\',\n\t\'fileinfo_output\' :' \ - ' \'%s\',\n\t\'unpacker_output\' : \'%s\',\n\t\'bin2llvmir_output\' :' \ - ' \'%s\',\n\t\'llvmir2hll_output\' : \'%s\',\n\t\'fileinfo_runtime\' :' \ - ' \'%s\',\n\t\'bin2llvmir_runtime\' : \'%s\',\n\t\'llvmir2hll_runtime\' :' \ - ' \'%s\',\n\t\'fileinfo_memory\' : \'%s\',\n\t\'bin2llvmir_memory\' :' \ - ' \'%s\',\n\t\'llvmir2hll_memory\' : \'%s\'\n}\n' - - print(log_structure % ( - self.input, self.pdb_file, LOG_DECOMPILATION_START_DATE, LOG_DECOMPILATION_END_DATE, self.mode, - self.args.arch, - FORMAT, LOG_FILEINFO_RC, LOG_UNPACKER_RC, LOG_BIN2LLVMIR_RC, LOG_LLVMIR2HLL_RC, - LOG_FILEINFO_OUTPUT, LOG_UNPACKER_OUTPUT, LOG_BIN2LLVMIR_OUTPUT, LOG_LLVMIR2HLL_OUTPUT, - LOG_FILEINFO_RUNTIME, LOG_BIN2LLVMIR_RUNTIME, LOG_LLVMIR2HLL_RUNTIME, LOG_FILEINFO_MEMORY, - LOG_BIN2LLVMIR_MEMORY, LOG_LLVMIR2HLL_MEMORY)) - """ - - # - # Parses the given return code and output from a tool that was run through - # `/usr/bin/time -v` and prints the return code to be stored into the log. - # - # Parameters: - # - # - $1: return code from `/usr/bin/time` - # - $2: combined output from the tool and `/usr/bin/time -v` - # - # This function has to be called for every tool that is run through - # `/usr/bin/time`. The reason is that when a tool is run without - # `/usr/bin/time` and it e.g. segfaults, shell returns 139, but when it is run - # through `/usr/bin/time`, it returns 11 (139 - 128). If this is the case, this - # function prints 139 instead of 11 to make the return codes of all tools - # consistent. - # - def get_tool_rc(self, return_code, output): - global BASH_REMATCH - global RC - - orig_rc = return_code - signal_regex = 'Command terminated by signal ([0-9]*)' - - if re.search(signal_regex, output): - signal_num = BASH_REMATCH[1] - RC = signal_num + 128 - else: - RC = orig_rc - # We want to be able to distinguish assertions and memory-insufficiency - # errors. The problem is that both assertions and memory-insufficiency - # errors make the program exit with return code 134. We solve this by - # replacing 134 with 135 (SIBGUS, 7) when there is 'std::bad_alloc' in the - # output. So, 134 will mean abort (assertion error) and 135 will mean - # memory-insufficiency error. - if RC == 134 or re.search('std::bad_alloc', output): - RC = 135 - print(RC) - - return RC - - # - # Parses the given output ($1) from a tool that was run through - # `/usr/bin/time -v` and prints the memory usage in MB. - # - def get_tool_memory_usage(self, tool): - """The output from `/usr/bin/time -v` looks like this: - - [..] (output from the tool) - Command being timed: 'tool' - [..] (other data) - Maximum resident set size (kbytes): 1808 - [..] (other data) - - We want the value of 'resident set size' (RSS), which we convert from KB - to MB. If the resulting value is less than 1 MB, round it to 1 MB. - """ - _, _, tail = tool.partition('Maximum resident set size (kbytes): ') - rss_kb = tail.split(' ')[0] - rss_mb = (rss_kb / 1024) - - return rss_mb if (rss_mb > 0) else 1 - - # - # Prints an escaped version of the given text so it can be inserted into JSON. - # - # Parameters: - # - $1 Text to be escaped. - # - def json_escape(self, text): - # We need to escape backslashes (\), double quotes ('), and replace new lines with '\n'. - return re.escape(text) - - def remove_colors(self, text): - """Removes color codes from the given text ($1). - """ - # _rc0 = subprocess.Popen('sed' + ' ' + '-r' + ' ' + 's/\x1b[^m]*m//g', shell=True, stdin=subprocess.PIPE) - - res = re.compile(r's/\x1b[^m]*m//g') - return res.sub('', text) + log_file = self.output_file + '.decompilation.log' + log_decompilation_end_date = str(int(time.time())) + + self.log_fileinfo_output = self.json_escape(self.log_fileinfo_output) + self.log_unpacker_output = self.json_escape(self.log_unpacker_output) + self.log_bin2llvmir_output = self.json_escape(self.log_bin2llvmir_output) + self.log_llvmir2hll_output = self.json_escape(self.log_llvmir2hll_output) + + log_structure = '{\n\t\"input_file\" : \"%s\",\n\t\"pdb_file\" : \"%s\",\n\t\"start_date\" : \"%s\",\n\t\"' \ + 'end_date\" : \"%s\",\n\t\"mode\" : \"%s\",\n\t\"arch\" : \"%s\",\n\t\"format\" : \"%s\",\n\t\"' \ + 'fileinfo_rc\" : \"%s\",\n\t\"unpacker_rc\" : \"%s\",\n\t\"bin2llvmir_rc\" : \"%s\",\n\t\"' \ + 'llvmir2hll_rc\" : \"%s\",\n\t\"fileinfo_output\" : \"%s\",\n\t\"unpacker_output\" : \"%s\",' \ + '\n\t\"bin2llvmir_output\" : \"%s\",\n\t\"llvmir2hll_output\" : \"%s\",\n\t\"fileinfo_runtime\"' \ + ' : \"%s\",\n\t\"bin2llvmir_runtime\" : \"%s\",\n\t\"llvmir2hll_runtime\" : \"%s\",\n\t\"' \ + 'fileinfo_memory\" : \"%s\",\n\t\"bin2llvmir_memory\" : \"%s\",\n\t\"llvmir2hll_memory\"' \ + ' : \"%s\"\n}\n' + + json_string = log_structure % ( + self.input_file, self.pdb_file, self.log_decompilation_start_date, log_decompilation_end_date, self.mode, + self.arch, self.format, self.log_fileinfo_rc, self.log_unpacker_rc, self.log_bin2llvmir_rc, + self.log_llvmir2hll_rc, self.log_fileinfo_output, self.log_unpacker_output, self.log_bin2llvmir_output, + self.log_llvmir2hll_output, self.log_fileinfo_time, self.log_bin2llvmir_time, self.log_llvmir2hll_time, + self.log_fileinfo_memory, self.log_bin2llvmir_memory, self.log_llvmir2hll_memory) + + with open(log_file, 'w+') as f: + f.write(json_string) + # print(json_string, file=open(log_file, 'w+')) + + def json_escape(self, string): + # TODO + return string.rstrip('\r\n').replace('\n', r'\n') def string_to_md5(self, string): """Generate a MD5 checksum from a given string. @@ -688,13 +634,14 @@ def decompile(self): # Initialize variables used by logging. if self.args.generate_log: - log_decompilation_start_date = time.strftime('%s') # os.popen('date + %s').read().rstrip('\n') + self.log_decompilation_start_date = str(int(time.time())) # Put the tool log file and tmp file into /tmp because it uses tmpfs. This means that # the data are stored in RAM instead on the disk, which should provide faster access. - tmp_dir = '/tmp/decompiler_log' + tmp_dir = os.path.join(tempfile.gettempdir(), 'decompiler_log') os.makedirs(tmp_dir, exist_ok=True) - file_md5 = self.string_to_md5(self.output_file) - tool_log_file = tmp_dir + '/' + file_md5 + '.tool' + with open(self.output_file, 'r', encoding='utf-8') as f: + file_md5 = '123456' # self.string_to_md5(f.read()) + self.tool_log_file = os.path.join(tmp_dir, file_md5 + '.tool') # Raw. if self.mode == 'raw': @@ -893,15 +840,10 @@ def decompile(self): fileinfo_rc = 0 if self.args.generate_log: - """ - tcmd = TimeMeasuredProcess() - LOG_FILEINFO_OUTPUT, fileinfo_rc, LOG_FILEINFO_RUNTIME = \ - tcmd.run_cmd([config.FILEINFO] + fileinfo_params) - - LOG_FILEINFO_MEMORY = self.get_tool_memory_usage(LOG_FILEINFO_OUTPUT) - print(LOG_FILEINFO_OUTPUT) - """ - pass + self.log_fileinfo_memory, self.log_fileinfo_time, self.log_fileinfo_output, self.log_fileinfo_rc = \ + cmd.run_measured_cmd([config.FILEINFO, *fileinfo_params]) + + print(self.log_fileinfo_output) else: fileinfo, fileinfo_rc, _ = cmd.run_cmd([config.FILEINFO, *fileinfo_params]) print(fileinfo) @@ -933,8 +875,9 @@ def decompile(self): unpacker = Unpacker(unpack_params) if self.args.generate_log: # we should get the output from the unpacker tool - log_unpacker_output, unpacker_rc = unpacker.unpack_all() - LOG_UNPACKER_RC = unpacker_rc + self.log_unpacker_output, self.log_unpacker_rc = unpacker.unpack_all() + + unpacker_rc = self.log_unpacker_rc else: _, unpacker_rc = unpacker.unpack_all() @@ -973,24 +916,14 @@ def decompile(self): print('RUN: ' + config.FILEINFO + ' ' + ' '.join(fileinfo_params)) if self.args.generate_log: - """ - FILEINFO_AND_TIME_OUTPUT = os.popen( - TIME + ' \'' + config.FILEINFO + '\' \'' + ' '.join(fileinfo_params) + '\' 2>&1').read().rstrip( - '\n') - - fileinfo_rc = 0 # _rc0 - - tcmd = TimeMeasuredProcess() - LOG_FILEINFO_OUTPUT, fileinfo_rc, LOG_FILEINFO_RUNTIME = \ - tcmd.run_cmd([config.FILEINFO] + fileinfo_params) - - LOG_FILEINFO_RUNTIME = (LOG_FILEINFO_RUNTIME + FILEINFO_RUNTIME) - FILEINFO_MEMORY = self.get_tool_memory_usage(FILEINFO_AND_TIME_OUTPUT) - LOG_FILEINFO_MEMORY = (LOG_FILEINFO_MEMORY + FILEINFO_MEMORY) / 2 - LOG_FILEINFO_OUTPUT = self.get_tool_output(FILEINFO_AND_TIME_OUTPUT) - print(LOG_FILEINFO_OUTPUT) - """ - pass + fileinfo_memory, fileinfo_time, self.log_fileinfo_output, self.log_fileinfo_rc \ + = cmd.run_measured_cmd([config.FILEINFO, *fileinfo_params]) + + fileinfo_rc = self.log_fileinfo_rc + self.log_fileinfo_time += fileinfo_time + self.log_fileinfo_memory = (self.log_fileinfo_memory + fileinfo_memory) / 2 + + print(self.log_fileinfo_output) else: fileinfo, fileinfo_rc, _ = cmd.run_cmd([config.FILEINFO, *fileinfo_params]) print(fileinfo) @@ -1018,17 +951,17 @@ def decompile(self): self.arch = arch_full.strip() # Get object file format. - fileformat, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--format']) - fileformat = fileformat.lower() + self.format, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--format']) + self.format = self.format.lower() # Intel HEX needs architecture to be specified - if fileformat in ['ihex']: + if self.format in ['ihex']: if not self.arch or self.arch == 'unknown': - Utils.print_error('Option -a|--arch must be used with format ' + fileformat) + Utils.print_error('Option -a|--arch must be used with format ' + self.format) return 1 if not self.args.endian: - Utils.print_error('Option -e|--endian must be used with format ' + fileformat) + Utils.print_error('Option -e|--endian must be used with format ' + self.format) return 1 cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--write', '--arch', self.arch]) @@ -1072,7 +1005,7 @@ def decompile(self): # # TODO: Using ELF for IHEX is ok, but for raw, we probably should somehow decide between ELF and PE, # or use both, for RAW. - sig_format = fileformat + sig_format = self.format if sig_format in ['ihex', 'raw']: sig_format = 'elf' @@ -1213,37 +1146,13 @@ def decompile(self): print('##### Decompiling ' + self.input_file + ' into ' + self.out_backend_bc + '...') print('RUN: ' + config.BIN2LLVMIR + ' ' + ' '.join(bin2llvmir_params) + ' -o ' + self.out_backend_bc) - bin2llvmir_rc = 0 - if self.args.generate_log: - """ - PID = 0 - bin2llvmir_rc = 0 - - def thread1(): - subprocess.call([TIME, config.BIN2LLVMIR, ' '.join(bin2llvmir_params), '-o', - self.out_backend_bc], shell=True, stdout=open(tool_log_file, 'wb'), stderr=subprocess.STDOUT) - - threading.Thread(target=thread1).start() - - PID = 0 # TODO $! Expand.exclamation() - - def thread2(): - self.timed_kill(PID) + self.log_bin2llvmir_memory, self.log_bin2llvmir_time, self.log_bin2llvmir_output, \ + self.log_bin2llvmir_rc = cmd.run_measured_cmd([config.BIN2LLVMIR, *bin2llvmir_params, '-o', + self.out_backend_bc], out=open(self.tool_log_file, 'w')) - threading.Thread(target=thread2).start() - - # subprocess.call(['wait', PID], shell=True, stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) - os.kill(PID, 0) - - bin2llvmir_rc = 0 # TODO use rc _rc2 - BIN2LLVMIR_AND_TIME_OUTPUT = os.popen('cat \'' + tool_log_file + '\'').read().rstrip('\n') - LOG_BIN2LLVMIR_RC = self.get_tool_rc(bin2llvmir_rc, BIN2LLVMIR_AND_TIME_OUTPUT) - LOG_BIN2LLVMIR_RUNTIME = self.get_tool_runtime(BIN2LLVMIR_AND_TIME_OUTPUT) - LOG_BIN2LLVMIR_MEMORY = self.get_tool_memory_usage(BIN2LLVMIR_AND_TIME_OUTPUT) - LOG_BIN2LLVMIR_OUTPUT = self.get_tool_output(BIN2LLVMIR_AND_TIME_OUTPUT) - print(LOG_BIN2LLVMIR_OUTPUT, end='') - """ + bin2llvmir_rc = self.log_bin2llvmir_rc + print(self.log_bin2llvmir_output) else: bin22llvmir_out, bin2llvmir_rc, _ = cmd.run_cmd([config.BIN2LLVMIR, *bin2llvmir_params, '-o', self.out_backend_bc]) @@ -1346,39 +1255,13 @@ def thread2(): print('##### Decompiling ' + self.out_backend_bc + ' into ' + self.output_file + '...') print('RUN: ' + config.LLVMIR2HLL + ' ' + ' '.join(llvmir2hll_params)) - llvmir2hll_rc = 0 - if self.args.generate_log: - """ - PID = 0 + self.log_llvmir2hll_memory, self.log_llvmir2hll_time, self.log_llvmir2hll_output, \ + self.log_llvmir2hll_rc = cmd.run_measured_cmd([config.LLVMIR2HLL, *llvmir2hll_params], + out=open(self.tool_log_file, 'a')) - def thread3(): - subprocess.call([TIME, config.LLVMIR2HLL] + llvmir2hll_params, shell=True, stdout=open( - tool_log_file, 'wb'), stderr=subprocess.STDOUT) - - threading.Thread(target=thread3).start() - - PID = 0 # TODO Expand.exclamation() - - def thread4(): - self.timed_kill(PID) - - threading.Thread(target=self.thread4).start() - - os.kill(PID, 0) - # subprocess.call(['wait', PID], shell=True, stderr=subprocess.STDOUT, stdout=subprocess.DEVNULL) - - llvmir2hll_rc = 0 # use rc _rc4 - LLVMIR2HLL_AND_TIME_OUTPUT = os.popen('cat \'' + tool_log_file + '\'').read().rstrip('\n') - LOG_LLVMIR2HLL_RC = self.get_tool_rc(llvmir2hll_rc, LLVMIR2HLL_AND_TIME_OUTPUT) - LOG_LLVMIR2HLL_RUNTIME = self.get_tool_runtime(LLVMIR2HLL_AND_TIME_OUTPUT) - LOG_LLVMIR2HLL_MEMORY = self.get_tool_memory_usage(LLVMIR2HLL_AND_TIME_OUTPUT) - LOG_LLVMIR2HLL_OUTPUT = self.get_tool_output(LLVMIR2HLL_AND_TIME_OUTPUT) - - print(LOG_LLVMIR2HLL_OUTPUT) - # Wait a bit to ensure that all the memory that has been assigned to the tool was released. - time.sleep(0.1) - """ + llvmir2hll_rc = self.log_llvmir2hll_rc + print(self.log_llvmir2hll_output) else: llvmir2hll_out, llvmir2hll_rc, _ = cmd.run_cmd([config.LLVMIR2HLL, *llvmir2hll_params]) print(llvmir2hll_out) diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index 02c5beff8..f026b2600 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -192,7 +192,7 @@ def unpack_all(self): if return_code == self.RET_UNPACK_OK or return_code == self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK \ or return_code == self.RET_UNPACKER_FAILED_OTHERS_OK: res_rc = return_code - res_out += output + res_out = output shutil.move(tmp_output, self.output) self.input = self.output @@ -203,7 +203,7 @@ def unpack_all(self): os.remove(tmp_output) break - return res_out, return_code if res_rc == -1 else res_rc + return (res_out, return_code) if res_rc == -1 else ('', res_rc) if __name__ == '__main__': diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 65275c2ae..a49380ac5 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -9,7 +9,7 @@ import signal import subprocess import sys -from timeit import Timer +import time import retdec_config as config @@ -57,6 +57,25 @@ def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', If the timeout expires before the command finishes, the value of `output` is the command's output generated up to the timeout. """ + _, output, return_code, timeouted = self._run_cmd(cmd, input, timeout, input_encoding, output_encoding, + strip_shell_colors, stdout) + + return output, return_code, timeouted + + def run_measured_cmd(self, args, out=subprocess.STDOUT): + cmd = CmdRunner() + + start = time.time() + p, output, rc, timeouted = cmd._run_cmd(args, stdout=out) + elapsed = time.time() - start + + # TODO get memory usage from process + memory = 0 + + return memory, elapsed, output, rc + + def _run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', + output_encoding='utf-8', strip_shell_colors=True, stdout=subprocess.STDOUT): def decode(output): if output_encoding is not None: @@ -74,13 +93,13 @@ def decode(output): try: p = self.start(cmd) output, _ = p.communicate(input, timeout) - return decode(output).rstrip(), p.returncode, False + return p, decode(output).rstrip(), p.returncode, False except subprocess.TimeoutExpired: # Kill the process, along with all its child processes. p.kill() # Finish the communication to obtain the output. output, _ = p.communicate() - return decode(output).rstrip(), p.returncode, True + return p, decode(output).rstrip(), p.returncode, True def start(self, cmd, discard_output=False, stdout=subprocess.STDOUT): """Starts the given command and returns a handler to it. @@ -161,29 +180,6 @@ def kill(self): subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) -class TimeMeasuredProcess: - - def __init__(self): - self.output = '' - self.rc = 0 - - def run_cmd(self, args): - """ - - :param args: - :return: (output, return_code, time) - """ - - def runProcess(): - cmd = CmdRunner() - - self.output, self.rc, _ = cmd.run_cmd(args) - - t = Timer(runProcess) - - return self.output, self.rc, t.timeit(1) - - class Utils: @staticmethod From 6a804407cc0dc34256408e9e32164941c6911313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 16:29:22 +0200 Subject: [PATCH 35/48] [skip ci] Some fixes and cleanup --- scripts/retdec_decompiler.py | 65 +++++++------------ .../retdec_signature_from_library_creator.py | 6 +- 2 files changed, 25 insertions(+), 46 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 80e372459..905df1a04 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -4,12 +4,9 @@ import argparse import glob -import hashlib import os -import re import shutil import sys -import tempfile import time import retdec_config as config @@ -18,7 +15,7 @@ from retdec_utils import Utils, CmdRunner -def parse_args(_args): +def parse_args(args): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -101,6 +98,7 @@ def parse_args(_args): parser.add_argument('--backend-cfg-test', dest='backend_cfg_test', + action='store_true', help='Unifies the labels of all nodes in the emitted CFG (this has to be used in tests).') parser.add_argument('--backend-disabled-opts', @@ -110,10 +108,12 @@ def parse_args(_args): parser.add_argument('--backend-emit-cfg', dest='backend_emit_cfg', + action='store_true', help='Emits a CFG for each function in the backend IR (in the .dot format).') parser.add_argument('--backend-emit-cg', dest='backend_emit_cg', + action='store_true', help='Emits a CG for the decompiled module in the backend IR (in the .dot format).') parser.add_argument('--backend-cg-conversion', @@ -143,10 +143,12 @@ def parse_args(_args): parser.add_argument('--backend-keep-all-brackets', dest='backend_keep_all_brackets', + action='store_true', help='Keeps all brackets in the generated code.') parser.add_argument('--backend-keep-library-funcs', dest='backend_keep_library_funcs', + action='store_true', help='Keep functions from standard libraries.') parser.add_argument('--backend-llvmir2bir-converter', @@ -157,6 +159,7 @@ def parse_args(_args): parser.add_argument('--backend-no-compound-operators', dest='backend_no_compound_operators', + action='store_true', help='Do not emit compound operators (like +=) instead of assignments.') parser.add_argument('--backend-no-debug', @@ -298,13 +301,12 @@ def parse_args(_args): help='Disables the default memory limit (half of system RAM) of fileinfo, ' 'unpacker, bin2llvmir, and llvmir2hll.') - return parser.parse_args() + return parser.parse_args(args) class Decompiler: - def __init__(self, _args): - self.args = parse_args(_args) - self.timeout = 300 + def __init__(self, args): + self.args = parse_args(args) self.input_file = '' self.output_file = '' @@ -324,13 +326,10 @@ def __init__(self, _args): self.out_backend_ll = '' self.out_restored = '' self.out_archive = '' - self.tool_log_file = '' self.log_decompilation_start_date = '' - self.log_fileinfo_rc = 0 self.log_fileinfo_time = 0 - self.LOG_FILEINFO_MEMORY = 0 self.log_fileinfo_output = '' self.log_fileinfo_memory = 0 @@ -349,7 +348,7 @@ def __init__(self, _args): def check_arguments(self): """Check proper combination of input arguments. - """ + """ # Check whether the input file was specified. if self.args.input: @@ -571,19 +570,16 @@ def cleanup(self): Utils.remove_dir_forced(self.out_backend_bc) Utils.remove_dir_forced(self.out_backend_ll) - Utils.remove_dir_forced(self.out_restored) # Archive support + Utils.remove_dir_forced(self.out_restored) + # Archive support (Macho-O Universal) Utils.remove_dir_forced(self.out_archive) - # Archive support (Macho-O Universal) + # Signatures generated from archives for sig in self.signatures_to_remove: Utils.remove_dir_forced(sig) - # Signatures generated from archives - if self.tool_log_file: - Utils.remove_dir_forced(self.tool_log_file) - def generate_log(self): log_file = self.output_file + '.decompilation.log' log_decompilation_end_date = str(int(time.time())) @@ -611,20 +607,11 @@ def generate_log(self): with open(log_file, 'w+') as f: f.write(json_string) - # print(json_string, file=open(log_file, 'w+')) def json_escape(self, string): # TODO return string.rstrip('\r\n').replace('\n', r'\n') - def string_to_md5(self, string): - """Generate a MD5 checksum from a given string. - """ - m = hashlib.md5() - m.update(string) - - return m.hexdigest() - def decompile(self): cmd = CmdRunner() @@ -635,13 +622,6 @@ def decompile(self): # Initialize variables used by logging. if self.args.generate_log: self.log_decompilation_start_date = str(int(time.time())) - # Put the tool log file and tmp file into /tmp because it uses tmpfs. This means that - # the data are stored in RAM instead on the disk, which should provide faster access. - tmp_dir = os.path.join(tempfile.gettempdir(), 'decompiler_log') - os.makedirs(tmp_dir, exist_ok=True) - with open(self.output_file, 'r', encoding='utf-8') as f: - file_md5 = '123456' # self.string_to_md5(f.read()) - self.tool_log_file = os.path.join(tmp_dir, file_md5 + '.tool') # Raw. if self.mode == 'raw': @@ -1149,7 +1129,7 @@ def decompile(self): if self.args.generate_log: self.log_bin2llvmir_memory, self.log_bin2llvmir_time, self.log_bin2llvmir_output, \ self.log_bin2llvmir_rc = cmd.run_measured_cmd([config.BIN2LLVMIR, *bin2llvmir_params, '-o', - self.out_backend_bc], out=open(self.tool_log_file, 'w')) + self.out_backend_bc]) bin2llvmir_rc = self.log_bin2llvmir_rc print(self.log_bin2llvmir_output) @@ -1257,8 +1237,7 @@ def decompile(self): if self.args.generate_log: self.log_llvmir2hll_memory, self.log_llvmir2hll_time, self.log_llvmir2hll_output, \ - self.log_llvmir2hll_rc = cmd.run_measured_cmd([config.LLVMIR2HLL, *llvmir2hll_params], - out=open(self.tool_log_file, 'a')) + self.log_llvmir2hll_rc = cmd.run_measured_cmd([config.LLVMIR2HLL, *llvmir2hll_params]) llvmir2hll_rc = self.log_llvmir2hll_rc print(self.log_llvmir2hll_output) @@ -1285,22 +1264,22 @@ def decompile(self): if self.args.backend_emit_cg and self.args.backend_cg_conversion == 'auto': if Utils.tool_exists('dot'): - print('RUN: dot -T' + self.args.graph_format + ' ' + self.output_file + '.cg.dot > ' + self.output_file + print('RUN: dot -T' + self.args.graph_format + ' ' + self.output_file + '.cg.dot -o ' + self.output_file + '.cg.' + self.args.graph_format) - cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output_file + '.cg.dot'], - stdout=open(self.output_file + '.cg.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, self.output_file + '.cg.dot', '-o', + self.output_file + '.cg.' + self.args.graph_format]) else: print('Please install \'Graphviz\' to generate graphics.') if self.args.backend_emit_cfg and self.args.backend_cfg_conversion == 'auto': if Utils.tool_exists('dot'): for cfg in glob.glob(self.output_file + '.cfg.*.dot'): - print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' > ' + ( + print('RUN: dot -T' + self.args.graph_format + ' ' + cfg + ' -o ' + ( os.path.splitext(cfg)[0] + '.' + self.args.graph_format)) - cmd.run_cmd(['dot', '-T' + self.args.graph_format, cfg], - stdout=open((os.path.splitext(cfg)[0]) + '.' + self.args.graph_format, 'wb')) + cmd.run_cmd(['dot', '-T' + self.args.graph_format, cfg, '-o', + os.path.splitext(cfg)[0] + '.' + self.args.graph_format]) else: print('Please install \'Graphviz\' to generate graphics.') diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 62749015b..057268b0c 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -104,12 +104,12 @@ def run(self): continue # Get library name for .pat file. - lib_name = Path(lib_path).resolve().stem + lib_name = os.path.splitext(lib_path)[0] # Create sub-directory for object files. object_dir = os.path.join(self.tmp_dir_path, lib_name) + '-objects' object_dirs = [object_dir] - os.makedirs(object_dir) + os.makedirs(object_dir, exist_ok=True) # Extract all files to temporary folder. subprocess.call([config.AR, lib_path, '--extract', '--output', object_dir], shell=True) @@ -126,7 +126,7 @@ def run(self): # Extract patterns from library. pattern_file = os.path.join(self.tmp_dir_path, lib_name) + '.pat' pattern_files = [pattern_file] - result = subprocess.call([config.BIN2PAT, '-o', pattern_file] + objects, shell=True) + result = subprocess.call([config.BIN2PAT, '-o', pattern_file, *objects], shell=True) if result != 0: self.print_error_and_cleanup('utility bin2pat failed when processing %s' % lib_path) From e00919331826f4498cc41bf808178a60fd5fe7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 17:18:58 +0200 Subject: [PATCH 36/48] [skip ci] Print memory usage, print output from unpacker, match code convention and some other fixes --- scripts/retdec_archive_decompiler.py | 6 +- scripts/retdec_decompiler.py | 86 +++++++++---------- .../retdec_signature_from_library_creator.py | 6 +- scripts/retdec_unpacker.py | 27 +++--- scripts/retdec_utils.py | 22 +++-- 5 files changed, 78 insertions(+), 69 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 0c3f82021..39b9717d4 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -11,7 +11,7 @@ from retdec_utils import CmdRunner -def parse_args(_args): +def parse_args(args): parser = argparse.ArgumentParser(description='Runs the decompilation script with the given optional arguments over' ' all files in the given static library or prints list of files in' ' plain text with --plain argument or in JSON format with' @@ -40,7 +40,7 @@ def parse_args(_args): dest="arg_list", help="args passed to the decompiler") - return parser.parse_args(_args) + return parser.parse_args(args) class ArchiveDecompiler: @@ -193,5 +193,5 @@ def decompile_archive(self): if __name__ == '__main__': - archive_decompiler = ArchiveDecompiler(sys.argv) + archive_decompiler = ArchiveDecompiler(sys.argv[1:]) sys.exit(archive_decompiler.decompile_archive()) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 905df1a04..f7593b091 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -346,7 +346,7 @@ def __init__(self, args): self.log_llvmir2hll_memory = 0 self.log_llvmir2hll_output = '' - def check_arguments(self): + def _check_arguments(self): """Check proper combination of input arguments. """ @@ -529,7 +529,7 @@ def check_arguments(self): return True - def print_warning_if_decompiling_bytecode(self): + def _print_warning_if_decompiling_bytecode(self): """Prints a warning if we are decompiling bytecode.""" cmd = CmdRunner() @@ -539,7 +539,7 @@ def print_warning_if_decompiling_bytecode(self): Utils.print_warning('Detected %s bytecode, which cannot be decompiled by our machine-code decompiler.' ' The decompilation result may be inaccurate.' % bytecode) - def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): + def _check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): """Checks whether the decompilation should be forcefully stopped because of the --stop-after parameter. If so, cleanup is run and the script exits with 0. Arguments: @@ -549,15 +549,15 @@ def check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): if self.args.stop_after == tool_name: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() print() print('#### Forced stop due to - -stop - after %s...' % self.args.stop_after) return True return False - def cleanup(self): + def _cleanup(self): """Cleanup working directory""" if self.args.cleanup: @@ -580,14 +580,14 @@ def cleanup(self): for sig in self.signatures_to_remove: Utils.remove_dir_forced(sig) - def generate_log(self): + def _generate_log(self): log_file = self.output_file + '.decompilation.log' log_decompilation_end_date = str(int(time.time())) - self.log_fileinfo_output = self.json_escape(self.log_fileinfo_output) - self.log_unpacker_output = self.json_escape(self.log_unpacker_output) - self.log_bin2llvmir_output = self.json_escape(self.log_bin2llvmir_output) - self.log_llvmir2hll_output = self.json_escape(self.log_llvmir2hll_output) + self.log_fileinfo_output = self._json_escape(self.log_fileinfo_output) + self.log_unpacker_output = self._json_escape(self.log_unpacker_output) + self.log_bin2llvmir_output = self._json_escape(self.log_bin2llvmir_output) + self.log_llvmir2hll_output = self._json_escape(self.log_llvmir2hll_output) log_structure = '{\n\t\"input_file\" : \"%s\",\n\t\"pdb_file\" : \"%s\",\n\t\"start_date\" : \"%s\",\n\t\"' \ 'end_date\" : \"%s\",\n\t\"mode\" : \"%s\",\n\t\"arch\" : \"%s\",\n\t\"format\" : \"%s\",\n\t\"' \ @@ -608,7 +608,7 @@ def generate_log(self): with open(log_file, 'w+') as f: f.write(json_string) - def json_escape(self, string): + def _json_escape(self, string): # TODO return string.rstrip('\r\n').replace('\n', r'\n') @@ -616,7 +616,7 @@ def decompile(self): cmd = CmdRunner() # Check arguments and set default values for unset options. - if not self.check_arguments(): + if not self._check_arguments(): return 1 # Initialize variables used by logging. @@ -653,7 +653,7 @@ def decompile(self): print('Invalid --arch option \'' + self.args.arch + '\'. File contains these architecture families:') cmd.run_cmd([config.EXTRACT, '--list', self.input_file]) - self.cleanup() + self._cleanup() return 1 else: # Pick best architecture @@ -673,20 +673,20 @@ def decompile(self): # Check for thin signature. if Utils.has_thin_archive_signature(self.input_file): - self.cleanup() + self._cleanup() Utils.print_error('File is a thin archive and cannot be decompiled.') return 1 # Check if our tools can handle it. if not Utils.is_valid_archive(self.input_file): - self.cleanup() + self._cleanup() Utils.print_error('The input archive has invalid format.') return 1 # Get and check number of objects. arch_object_count = Utils.archive_object_count(self.input_file) if arch_object_count <= 0: - self.cleanup() + self._cleanup() Utils.print_error('The input archive is empty.') return 1 @@ -701,7 +701,7 @@ def decompile(self): + out_restored) if not Utils.archive_get_by_index(self.input_file, self.args.ar_index, out_restored): - self.cleanup() + self._cleanup() valid_index = (arch_object_count - 1) if valid_index != 0: @@ -723,7 +723,7 @@ def decompile(self): + out_restored) if not Utils.archive_get_by_name(self.input_file, self.args.ar_name, out_restored): - self.cleanup() + self._cleanup() Utils.print_error('File named %s was not found in the input archive.' % self.args.ar_name) return 1 @@ -734,7 +734,7 @@ def decompile(self): print('or \' --ar-name=string\' option. Archive contains these files:') Utils.archive_list_numbered_content(self.input_file) - self.cleanup() + self._cleanup() return 1 else: if self.args.ar_name: @@ -830,14 +830,14 @@ def decompile(self): if fileinfo_rc != 0: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() # The error message has been already reported by fileinfo in stderr. Utils.print_error('') return 1 - if self.check_whether_decompilation_should_be_forcefully_stopped('fileinfo'): + if self._check_whether_decompilation_should_be_forcefully_stopped('fileinfo'): return 0 # @@ -861,7 +861,7 @@ def decompile(self): else: _, unpacker_rc = unpacker.unpack_all() - if self.check_whether_decompilation_should_be_forcefully_stopped('unpacker'): + if self._check_whether_decompilation_should_be_forcefully_stopped('unpacker'): return 0 # RET_UNPACK_OK=0 @@ -910,14 +910,14 @@ def decompile(self): if fileinfo_rc != 0: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() # The error message has been already reported by fileinfo in stderr. Utils.print_error('') return 1 - self.print_warning_if_decompiling_bytecode() + self._print_warning_if_decompiling_bytecode() # Check whether the architecture was specified. if self.arch: @@ -960,9 +960,9 @@ def decompile(self): else: # nothing if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() Utils.print_error('Unsupported target architecture %s. Supported architectures: ' 'Intel x86, ARM, ARM + Thumb, MIPS, PIC32, PowerPC.' % self.arch) return 1 @@ -973,9 +973,9 @@ def decompile(self): if fileclass not in ['16', '32']: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() Utils.print_error( 'Unsupported target format \'%s%s\'. Supported formats: ELF32, PE32, Intel HEX 32, Mach-O 32.' % ( format, fileclass)) @@ -998,9 +998,9 @@ def decompile(self): sig_endian = 'be' else: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() Utils.print_error('Cannot determine endiannesss.') return 1 @@ -1011,7 +1011,7 @@ def decompile(self): signatures_dir = os.path.join(config.GENERIC_SIGNATURES_DIR, sig_format, fileclass, sig_endian, sig_arch) - self.print_warning_if_decompiling_bytecode() + self._print_warning_if_decompiling_bytecode() # Decompile unreachable functions. if self.args.keep_unreachable_funcs: @@ -1140,13 +1140,13 @@ def decompile(self): if bin2llvmir_rc != 0: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() Utils.print_error('Decompilation to LLVM IR failed') return 1 - if self.check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir'): + if self._check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir'): return 0 # modes 'bin' || 'raw' @@ -1247,13 +1247,13 @@ def decompile(self): if llvmir2hll_rc != 0: if self.args.generate_log: - self.generate_log() + self._generate_log() - self.cleanup() + self._cleanup() Utils.print_error('Decompilation of file %s failed' % self.out_backend_bc) return 1 - if self.check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll'): + if self._check_whether_decompilation_should_be_forcefully_stopped('llvmir2hll'): return 0 # Convert .dot graphs to desired format. @@ -1298,10 +1298,10 @@ def decompile(self): # Store the information about the decompilation into the JSON file. if self.args.generate_log: - self.generate_log() + self._generate_log() # Success! - self.cleanup() + self._cleanup() print() print('##### Done!') @@ -1309,5 +1309,5 @@ def decompile(self): if __name__ == '__main__': - decompiler = Decompiler(sys.argv) + decompiler = Decompiler(sys.argv[1:]) sys.exit(decompiler.decompile()) diff --git a/scripts/retdec_signature_from_library_creator.py b/scripts/retdec_signature_from_library_creator.py index 057268b0c..06fd120ab 100644 --- a/scripts/retdec_signature_from_library_creator.py +++ b/scripts/retdec_signature_from_library_creator.py @@ -14,7 +14,7 @@ from retdec_utils import Utils -def parse_args(_args): +def parse_args(args): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -53,7 +53,7 @@ def parse_args(_args): action='store_true', help='Stop after bin2pat.') - return parser.parse_args(_args) + return parser.parse_args(args) class SigFromLib: @@ -172,5 +172,5 @@ def run(self): if __name__ == '__main__': - sig = SigFromLib(sys.argv) + sig = SigFromLib(sys.argv[1:]) sys.exit(sig.run()) diff --git a/scripts/retdec_unpacker.py b/scripts/retdec_unpacker.py index f026b2600..f7437db35 100644 --- a/scripts/retdec_unpacker.py +++ b/scripts/retdec_unpacker.py @@ -132,11 +132,12 @@ def _unpack(self, output): print('RUN: ' + config.UNPACKER + ' '.join(unpacker_params)) cmd = CmdRunner() - out, unpacker_rc, _ = cmd.run_cmd([config.UNPACKER] + unpacker_params) + unpacker_output, unpacker_rc, _ = cmd.run_cmd([config.UNPACKER, *unpacker_params]) + print(unpacker_output) if unpacker_rc == self.UNPACKER_EXIT_CODE_OK: print('##### Unpacking by using generic unpacker: successfully unpacked') - return out, self.RET_UNPACK_OK + return unpacker_output, self.RET_UNPACK_OK elif unpacker_rc == self.UNPACKER_EXIT_CODE_NOTHING_TO_DO: print('##### Unpacking by using generic unpacker: nothing to do') else: @@ -152,17 +153,18 @@ def _unpack(self, output): print('##### Trying to unpack ' + self.input + ' into ' + output + ' by using UPX...') print('RUN: upx -d ' + self.input + ' -o ' + output) - out, upx_rc, _ = cmd.run_cmd(['upx', '-d', self.input, '-o', output]) + unpacker_output, upx_rc, _ = cmd.run_cmd(['upx', '-d', self.input, '-o', output]) + print(unpacker_output) if upx_rc == 0: print('##### Unpacking by using UPX: successfully unpacked') if self.args.extended_exit_codes: if unpacker_rc == self.UNPACKER_EXIT_CODE_NOTHING_TO_DO: - return out, self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK + return unpacker_output, self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK elif unpacker_rc >= self.UNPACKER_EXIT_CODE_UNPACKING_FAILED: - return out, self.RET_UNPACKER_FAILED_OTHERS_OK + return unpacker_output, self.RET_UNPACKER_FAILED_OTHERS_OK else: - return out, self.RET_UNPACK_OK + return unpacker_output, self.RET_UNPACK_OK else: # We cannot distinguish whether upx failed or the input file was # not upx-packed @@ -173,9 +175,9 @@ def _unpack(self, output): # Do not return -> try the next unpacker # Return. if unpacker_rc >= self.UNPACKER_EXIT_CODE_UNPACKING_FAILED: - return out, self.RET_UNPACKER_FAILED + return unpacker_output, self.RET_UNPACKER_FAILED else: - return out, self.RET_NOTHING_TO_DO + return unpacker_output, self.RET_NOTHING_TO_DO def unpack_all(self): # Check arguments and set default values for unset options. @@ -187,12 +189,13 @@ def unpack_all(self): tmp_output = self.output + '.tmp' while True: - output, return_code = self._unpack(tmp_output) + unpacker_out, return_code = self._unpack(tmp_output) + + res_out += unpacker_out + '\n' if return_code == self.RET_UNPACK_OK or return_code == self.RET_UNPACKER_NOTHING_TO_DO_OTHERS_OK \ or return_code == self.RET_UNPACKER_FAILED_OTHERS_OK: res_rc = return_code - res_out = output shutil.move(tmp_output, self.output) self.input = self.output @@ -203,10 +206,10 @@ def unpack_all(self): os.remove(tmp_output) break - return (res_out, return_code) if res_rc == -1 else ('', res_rc) + return (res_out, return_code) if res_rc == -1 else (res_out, res_rc) if __name__ == '__main__': - unpacker = Unpacker(sys.argv) + unpacker = Unpacker(sys.argv[1:]) _, rc = unpacker.unpack_all() sys.exit(rc) diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index a49380ac5..765a232b5 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -58,7 +58,7 @@ def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', is the command's output generated up to the timeout. """ _, output, return_code, timeouted = self._run_cmd(cmd, input, timeout, input_encoding, output_encoding, - strip_shell_colors, stdout) + strip_shell_colors, False, stdout) return output, return_code, timeouted @@ -66,16 +66,13 @@ def run_measured_cmd(self, args, out=subprocess.STDOUT): cmd = CmdRunner() start = time.time() - p, output, rc, timeouted = cmd._run_cmd(args, stdout=out) + memory, output, rc, timeouted = cmd._run_cmd(args, track_memory=True, stdout=out) elapsed = time.time() - start - # TODO get memory usage from process - memory = 0 - - return memory, elapsed, output, rc + return memory, int(elapsed), output, rc def _run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', - output_encoding='utf-8', strip_shell_colors=True, stdout=subprocess.STDOUT): + output_encoding='utf-8', strip_shell_colors=True, track_memory=False, stdout=subprocess.STDOUT): def decode(output): if output_encoding is not None: @@ -90,10 +87,19 @@ def decode(output): if not isinstance(input, bytes): input = input.encode(input_encoding) + memory = 0 try: p = self.start(cmd) + if track_memory: + try: + import psutil + proc = psutil.Process(p.pid) + memory = int(proc.memory_info().rss / float(1 << 20)) + except ImportError: + memory = 0 + output, _ = p.communicate(input, timeout) - return p, decode(output).rstrip(), p.returncode, False + return memory, decode(output).rstrip(), p.returncode, False except subprocess.TimeoutExpired: # Kill the process, along with all its child processes. p.kill() From 7d732395415748b63fa31b3c54da9982cebc2ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 19 Jun 2018 21:49:50 +0200 Subject: [PATCH 37/48] [skip ci] Fix crash when using cleanup option; fix crash when using color ida --- scripts/retdec_archive_decompiler.py | 2 +- scripts/retdec_decompiler.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/retdec_archive_decompiler.py b/scripts/retdec_archive_decompiler.py index 39b9717d4..7a917a102 100644 --- a/scripts/retdec_archive_decompiler.py +++ b/scripts/retdec_archive_decompiler.py @@ -176,7 +176,7 @@ def decompile_archive(self): log_file = self.library_path + '.file_' + str(file_index) + '.log.verbose' # Do not escape! - output, _, timeouted = cmd.run_cmd([config.DECOMPILER, '--ar-index=' + str(i), '-o', + output, _, timeouted = cmd.run_cmd([sys.executable, config.DECOMPILER, '--ar-index=' + str(i), '-o', self.library_path + '.file_' + str(file_index) + '.c', self.library_path, *self.decompiler_sh_args], timeout=self.timeout) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index f7593b091..684c3aed1 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -561,24 +561,24 @@ def _cleanup(self): """Cleanup working directory""" if self.args.cleanup: - Utils.remove_dir_forced(self.out_unpacked) - Utils.remove_dir_forced(self.out_frontend_ll) - Utils.remove_dir_forced(self.out_frontend_bc) + Utils.remove_file_forced(self.out_unpacked) + Utils.remove_file_forced(self.out_frontend_ll) + Utils.remove_file_forced(self.out_frontend_bc) if self.config_file != self.args.config_db: - Utils.remove_dir_forced(self.config_file) + Utils.remove_file_forced(self.config_file) - Utils.remove_dir_forced(self.out_backend_bc) - Utils.remove_dir_forced(self.out_backend_ll) + Utils.remove_file_forced(self.out_backend_bc) + Utils.remove_file_forced(self.out_backend_ll) # Archive support - Utils.remove_dir_forced(self.out_restored) + Utils.remove_file_forced(self.out_restored) # Archive support (Macho-O Universal) - Utils.remove_dir_forced(self.out_archive) + Utils.remove_file_forced(self.out_archive) # Signatures generated from archives for sig in self.signatures_to_remove: - Utils.remove_dir_forced(sig) + Utils.remove_file_forced(sig) def _generate_log(self): log_file = self.output_file + '.decompilation.log' @@ -1110,7 +1110,7 @@ def decompile(self): # Prevent bin2llvmir from removing unreachable functions. bin2llvmir_params.remove('-unreachable-funcs') - if self.config_file == '' and self.args.config_db: + if self.config_file == '' or not self.config_file and self.args.config_db: self.config_file = self.args.config_db bin2llvmir_params.extend(['-config-path', self.config_file]) @@ -1294,7 +1294,7 @@ def decompile(self): # Colorize output file. if self.args.color_for_ida: - cmd.run_cmd([config.IDA_COLORIZER, self.output_file, self.config_file]) + cmd.run_cmd([sys.executable, config.IDA_COLORIZER, self.output_file, self.config_file]) # Store the information about the decompilation into the JSON file. if self.args.generate_log: From f15127df100c2eb9267db114cd9171e111cfd93c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Wed, 20 Jun 2018 15:01:22 +0200 Subject: [PATCH 38/48] [skip ci] Fix --backend-aggressive-opts argument --- scripts/retdec_decompiler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 684c3aed1..4966f0da0 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -84,12 +84,13 @@ def parse_args(args): parser.add_argument('--backend-aggressive-opts', dest='backend_aggressive_opts', + action='store_true', help='Enables aggressive optimizations.') parser.add_argument('--backend-arithm-expr-evaluator', dest='backend_arithm_expr_evaluator', default='c', - help='Name of the used evaluator of arithmetical expressions .') + help='Name of the used evaluator of arithmetical expressions.') parser.add_argument('--backend-call-info-obtainer', dest='backend_call_info_obtainer', From 60e8080de0703db639356c3cfbab0bb6b68b91d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Wed, 20 Jun 2018 22:17:07 +0200 Subject: [PATCH 39/48] [skip ci] Fix error when file arch is followed by a comment --- scripts/retdec_decompiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 4966f0da0..966f47088 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -929,7 +929,7 @@ def decompile(self): arch_full = arch_full.lower() # Strip comments in parentheses and all trailing whitespace - self.arch = arch_full.strip() + self.arch = arch_full.split(' ')[0] # Get object file format. self.format, _, _ = cmd.run_cmd([config.CONFIGTOOL, self.config_file, '--read', '--format']) From 449c85f6f9b05b1184cb471fa6649323785641f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 00:01:39 +0200 Subject: [PATCH 40/48] [skip ci] Match Bash script more closely --- scripts/retdec_decompiler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 966f47088..73358c99a 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -52,7 +52,6 @@ def parse_args(args): parser.add_argument('-m', '--mode', dest='mode', metavar='MODE', - default='bin', choices=['bin', 'll', 'raw'], help='Force the type of decompilation mode [bin|ll|raw]' '(default: ll if input\'s suffix is \'.ll\', bin otherwise).') @@ -1028,9 +1027,8 @@ def decompile(self): for lib in self.args.static_code_archive: print('Extracting signatures from file \'%s\'', lib) - # TODO replace command - crop_arch_path, _, _ = cmd.run_cmd( - 'basename \'' + lib + '\' | LC_ALL=C sed -e \'s/[^A-Za-z0-9_.-]/_/g\'') + # TODO replace command: LC_ALL=C sed -e 's/[^A-Za-z0-9_.-]/_/g' + crop_arch_path = os.path.basename(lib) sig_out = self.output_file + '.' + crop_arch_path + '.' + lib_index + '.yara' # Call sig from lib tool @@ -1149,6 +1147,7 @@ def decompile(self): if self._check_whether_decompilation_should_be_forcefully_stopped('bin2llvmir'): return 0 + # modes 'bin' || 'raw' # LL mode goes straight to backend. @@ -1156,7 +1155,7 @@ def decompile(self): self.out_backend_bc = self.input_file self.config_file = self.args.config_db - # Create parameters for the $LLVMIR2HLL call. + # Create parameters for the llvmir2hll call. llvmir2hll_params = ['-target-hll=' + self.args.hll, '-var-renamer=' + self.args.backend_var_renamer, '-var-name-gen=fruit', '-var-name-gen-prefix=', '-call-info-obtainer=' + self.args.backend_call_info_obtainer, From 54158720b2f207b3ab7fb37b3f3ddbcb1757eae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 10:21:28 +0200 Subject: [PATCH 41/48] [skip ci] Fix a few comments --- scripts/retdec_decompiler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 73358c99a..4018c8620 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -419,8 +419,8 @@ def _check_arguments(self): if self.args.config_db: if not os.access(self.args.config_db, os.R_OK): - Utils.print_error( - 'The input JSON configuration file \'%s\' does not exist or is not readable' % self.args.config_db) + Utils.print_error('The input JSON configuration file \'%s\' does not exist or is not readable' + % self.args.config_db) return False if self.args.pdb: @@ -544,7 +544,7 @@ def _check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): --stop-after parameter. If so, cleanup is run and the script exits with 0. Arguments: tool_name Name of the tool. - The function expects the $STOP_AFTER variable to be set. + The function expects the self.args.stop_after variable to be set. """ if self.args.stop_after == tool_name: @@ -696,7 +696,7 @@ def decompile(self): # Pick object by index. if self.args.ar_index: print() - print('##### Restoring object file on index '' + (self.args.ar_index) + '' from archive...') + print('##### Restoring object file on index \'%s\' from archive...' % self.args.ar_index) print('RUN: ' + config.AR + ' ' + self.input_file + ' --index ' + self.args.ar_index + ' --output ' + out_restored) @@ -718,7 +718,7 @@ def decompile(self): # Pick object by name elif self.args.ar_name: print() - print('##### Restoring object file with name '' + (self.args.ar_name) + '' from archive...') + print('##### Restoring object file with name \'%s\' from archive...' % self.args.ar_name) print('RUN: ' + config.AR + ' ' + self.input_file + ' --name ' + self.args.ar_name + ' --output ' + out_restored) From 9009c6c3787a8987a05bf9736072107b76b073be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 16:09:56 +0200 Subject: [PATCH 42/48] [skip ci] Add some comments --- scripts/retdec_decompiler.py | 2 +- scripts/retdec_utils.py | 44 ++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 4018c8620..4f0277623 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -553,7 +553,7 @@ def _check_whether_decompilation_should_be_forcefully_stopped(self, tool_name): self._cleanup() print() - print('#### Forced stop due to - -stop - after %s...' % self.args.stop_after) + print('#### Forced stop due to \'--stop-after %s\'...' % self.args.stop_after) return True return False diff --git a/scripts/retdec_utils.py b/scripts/retdec_utils.py index 765a232b5..4ea4bd8b4 100644 --- a/scripts/retdec_utils.py +++ b/scripts/retdec_utils.py @@ -1,9 +1,8 @@ -#! /usr/bin/env python3 +#!/usr/bin/env python3 """Compilation and decompilation utility functions. """ import os -import pathlib import re import shutil import signal @@ -21,7 +20,7 @@ class CmdRunner: """A runner of external commands.""" def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', - output_encoding='utf-8', strip_shell_colors=True, stdout=subprocess.STDOUT): + output_encoding='utf-8', strip_shell_colors=True): """Runs the given command (synchronously). :param list cmd: Command to be run as a list of arguments (strings). @@ -58,21 +57,26 @@ def run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', is the command's output generated up to the timeout. """ _, output, return_code, timeouted = self._run_cmd(cmd, input, timeout, input_encoding, output_encoding, - strip_shell_colors, False, stdout) + strip_shell_colors, False) return output, return_code, timeouted - def run_measured_cmd(self, args, out=subprocess.STDOUT): + def run_measured_cmd(self, command): + """Runs the given command (synchronously) and measure its time and memory. + :param list command: Command to be run as a list of arguments (strings). + + :returns: A quadruple (`memory`, `elapsed_time`, `output`, `return_code`). + """ cmd = CmdRunner() start = time.time() - memory, output, rc, timeouted = cmd._run_cmd(args, track_memory=True, stdout=out) + memory, output, rc, _ = cmd._run_cmd(command, track_memory=True) elapsed = time.time() - start return memory, int(elapsed), output, rc def _run_cmd(self, cmd, input=b'', timeout=None, input_encoding='utf-8', - output_encoding='utf-8', strip_shell_colors=True, track_memory=False, stdout=subprocess.STDOUT): + output_encoding='utf-8', strip_shell_colors=True, track_memory=False): def decode(output): if output_encoding is not None: @@ -105,7 +109,7 @@ def decode(output): p.kill() # Finish the communication to obtain the output. output, _ = p.communicate() - return p, decode(output).rstrip(), p.returncode, True + return memory, decode(output).rstrip(), p.returncode, True def start(self, cmd, discard_output=False, stdout=subprocess.STDOUT): """Starts the given command and returns a handler to it. @@ -113,6 +117,8 @@ def start(self, cmd, discard_output=False, stdout=subprocess.STDOUT): :param list cmd: Command to be run as a list of arguments (strings). :param bool discard_output: Should the output be discarded instead of being buffered so it can be obtained later? + :param int stdout: If discard_output is True, errors will be redirectected + to the stdout param. :returns: A handler to the started command (``subprocess.Popen``). @@ -214,8 +220,6 @@ def is_windows(): @staticmethod def print_error(error): """Print error message to stderr. - 1 argument is needed - Returns - 1 if number of arguments is incorrect """ print('Error: %s' % error, file=sys.stdout) @@ -273,7 +277,6 @@ def archive_object_count(path): def archive_list_content(path): """Print content of archive. 1 argument is needed - file path - Returns - 1 if number of arguments is incorrect """ cmd = CmdRunner() output, _, _ = cmd.run_cmd([config.AR, path, '--list', '--no-numbers']) @@ -283,7 +286,6 @@ def archive_list_content(path): def archive_list_numbered_content(path): """Print numbered content of archive. 1 argument is needed - file path - Returns - 1 if number of arguments is incorrect """ print('Index\tName') cmd = CmdRunner() @@ -294,7 +296,6 @@ def archive_list_numbered_content(path): def archive_list_numbered_content_json(path): """Print numbered content of archive in JSON format. 1 argument is needed - file path - Returns - 1 if number of arguments is incorrect """ cmd = CmdRunner() output, _, _ = cmd.run_cmd([config.AR, path, '--list', '--json']) @@ -306,14 +307,11 @@ def archive_get_by_name(path, name, output): 3 arguments are needed - path to the archive - name of the file - output path - Returns - 1 if number of arguments is incorrect - - 2 if error occurred """ - if not subprocess.call([config.AR, path, '--name', name, '--output', output], - shell=True, stderr=subprocess.STDOUT, stdout=None): - return 2 + ret = subprocess.call([config.AR, path, '--name', name, '--output', output], + shell=True, stderr=subprocess.STDOUT, stdout=None) - return 1 + return ret != 2 @staticmethod def archive_get_by_index(archive, index, output): @@ -321,12 +319,10 @@ def archive_get_by_index(archive, index, output): 3 arguments are needed - path to the archive - index of the file - output path - Returns - 1 if number of arguments is incorrect - - 2 if error occurred """ - if not subprocess.call([config.AR, archive, '--index', index, '--output', output], - shell=True, stderr=subprocess.STDOUT, stdout=None): - return 2 + ret = subprocess.call([config.AR, archive, '--index', index, '--output', output], + shell=True, stderr=subprocess.STDOUT, stdout=None) + return ret != 2 @staticmethod def is_macho_archive(path): From 1a08e9ff0d2047b2bad11ade3291ddcfed039fe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 16:11:00 +0200 Subject: [PATCH 43/48] [skip ci] Add early type_extractor/gen_cstdlib_and_linux_jsons.py and type_extractor/gen_windows_and_windrivers_jsons.py version --- .../gen_cstdlib_and_linux_jsons.py | 198 ++++++++++++++++++ .../gen_windows_and_windrivers_jsons.py | 194 +++++++++++++++++ 2 files changed, 392 insertions(+) create mode 100644 scripts/type_extractor/gen_cstdlib_and_linux_jsons.py create mode 100644 scripts/type_extractor/gen_windows_and_windrivers_jsons.py diff --git a/scripts/type_extractor/gen_cstdlib_and_linux_jsons.py b/scripts/type_extractor/gen_cstdlib_and_linux_jsons.py new file mode 100644 index 000000000..a59242a9b --- /dev/null +++ b/scripts/type_extractor/gen_cstdlib_and_linux_jsons.py @@ -0,0 +1,198 @@ +#! /usr/bin/env python3 + +"""Generator of JSON files containing C-types information for C standard library and other header files in /usr/include/ directory.""" + +import argparse +import shutil +import sys +import os +import subprocess +import glob + +# +# C standard library headers. +# +CSTDLIB_HEADERS = [ + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h' +] + +# +# Files we don't want in JSONs. +# +FILES_PATTERNS_TO_FILTER_OUT=[ + 'GL/', + 'Qt.*/', + 'SDL.*/', + 'X11/', + 'alsa/', + 'c\\+\\+/', + 'dbus.*/', + 'glib.*/', + 'libdrm/', + 'libxml2/', + 'llvm.*/', + 'mirclient/', + 'php[0-9.-]*/', + 'pulse/', + 'python.*/', + 'ruby.*/', + 'wayland.*/', + 'xcb/' +] + +#SEP = '\\|' +FILES_FILTER = '|'.join(FILES_PATTERNS_TO_FILTER_OUT) +#FILES_FILTER = (FILES_FILTER:Expand.hash()SEP) + +# +# Paths. +# +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRIPT_NAME = __name__ +EXTRACTOR = os.path.join(SCRIPT_DIR, 'extract_types.py') +MERGER = os.path.join(SCRIPT_DIR, 'merge_jsons.py') +INCLUDE_DIR = '/usr/include/' +OUT_DIR = '.' +STD_LIB_OUT_DIR = os.path.join(OUT_DIR, 'gen_tmp_cstdlib') +STD_LIB_JSON = os.path.join(OUT_DIR, 'cstdlib.json') +LINUX_OUT_DIR = os.path.join(OUT_DIR, 'gen_tmp_linux') +LINUX_JSON = os.path.join(OUT_DIR, 'linux.json') +CSTDLIB_PRIORITY_OUT_DIR = os.path.join(OUT_DIR, 'gen_tmp_cstdlib_priority') +LINUX_PRIORITY_OUT_DIR = os.path.join(OUT_DIR, 'gen_tmp_linux_priority') + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('-i', '--json-indent', + dest='json_indent', + default=1, + help='Set indentation in JSON files.') + + parser.add_argument('-f', '--files-filter', + dest='file_filter', + help='Pattern to ignore specific header files.') + + parser.add_argument('-n', '--no-cleanup', + dest='no_cleanup', + default=True, + action='store_true', + help='Do not remove dirs with JSONs for individual header files.') + + parser.add_argument('--cstdlib-headers', + dest='cstdlib_headers', + help='Set path to the C standard library headers with high-priority types info.') + + parser.add_argument('--linux-headers', + dest='linux_headers', + help='Set path to the Linux headers with high-priority types info.') + + return parser.parse_args() + + +args = parse_args() + +# +# Prints the given error message ($1) to stderr and exits. +# +def print_error_and_die (error) : + sys.stderr.write('Error: ' + error) + sys.exit(1) + +def remove_dir(path): + if os.path.isdir(path) and not os.path.islink(path): + shutil.rmtree(path) + elif os.path.exists(path): + os.remove(path) + +# +# Initial cleanup. +# +remove_dir(STD_LIB_OUT_DIR) +os.mkdir(STD_LIB_OUT_DIR) +remove_dir(LINUX_OUT_DIR) +os.mkdir(LINUX_OUT_DIR) +remove_dir(CSTDLIB_PRIORITY_OUT_DIR) +os.mkdir(CSTDLIB_PRIORITY_OUT_DIR) +remove_dir(LINUX_PRIORITY_OUT_DIR) +os.mkdir(LINUX_PRIORITY_OUT_DIR) + +# +# Generate JSONs for whole /usr/include path. +# Filter out unwanted headers. +# Move standard headers to other dir. +# + +if args.file_filter: + FILES_FILTER += '|' + args.file_filter + +subprocess.call([EXTRACTOR, INCLUDE_DIR, '-o', LINUX_OUT_DIR], shell = True) +FILES_FILTER = (FILES_FILTER//\//_) +subprocess.call(['find', LINUX_OUT_DIR + '/', '-regex', LINUX_OUT_DIR + '/.*\(' + FILES_FILTER + '\).*', '-delete'], shell = True) +# +# Move standard library headers to other directory. +# Edit standard header paths to look like type-extractor generated jsons. +# +for header in CSTDLIB_HEADERS: + for f in os.popen('find \'' + INCLUDE_DIR + '\' -name \'' + header + '\'').read().rip('\n'): + f = (f#INCLUDE_DIR) + f = (f////_) + f = (f/%\.h/.json) + if os.path.isfile(LINUX_OUT_DIR + '/' + f): + shutil.move(LINUX_OUT_DIR + '/' + f, STD_LIB_OUT_DIR) + +# +# Extract types info from high-priority cstdlib and linux headers if paths were given. +# +if args.cstdlib_headers: + subprocess.call([EXTRACTOR, args.cstdlib_headers, '-o', CSTDLIB_PRIORITY_OUT_DIR], shell = True) +if args.linux_headers: + subprocess.call([EXTRACTOR, args.linux_headers, '-o', LINUX_PRIORITY_OUT_DIR], shell = True) + +# +# Merging. +# Priority headers must be first. +# Cstdlib priority headers are merged to the C standard library JSON, +# Linux priority headers to the Linux JSON. +# +subprocess.call([MERGER, CSTDLIB_PRIORITY_OUT_DIR, STD_LIB_OUT_DIR, '-o', STD_LIB_JSON, '--json-indent', args.json_indent], shell = True) +subprocess.call([MERGER, LINUX_PRIORITY_OUT_DIR, LINUX_OUT_DIR, '-o', LINUX_JSON, '--json-indent', args.json_indent], shell = True) +# +# Optional cleanup at the end. +# +if not args.no_cleanup: + remove_dir(STD_LIB_OUT_DIR) + remove_dir(LINUX_OUT_DIR) + remove_dir(args.cstdlib_headers) + remove_dir(CSTDLIB_PRIORITY_OUT_DIR) + remove_dir(args.linux_headers) diff --git a/scripts/type_extractor/gen_windows_and_windrivers_jsons.py b/scripts/type_extractor/gen_windows_and_windrivers_jsons.py new file mode 100644 index 000000000..711c4eb78 --- /dev/null +++ b/scripts/type_extractor/gen_windows_and_windrivers_jsons.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 + +"""Generates JSON files from includes in Windows SDK and Windows Drivers Kit.""" + +import argparse +import shutil +import sys +import os +import subprocess + +# +# Paths. +# + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRIPT_NAME = __name__ +EXTRACTOR = os.path.join(SCRIPT_DIR, 'extract_types.py') +MERGER = os.path.join(SCRIPT_DIR, 'merge_jsons.py') +OUT_DIR = '.' + +# +# Windows SDK paths. +# +WIN_UCRT_OUT_DIR = os.path.join(OUT_DIR, 'windows_ucrt') +WIN_SHARED_OUT_DIR = os.path.join(OUT_DIR, 'windows_shared') +WIN_UM_OUT_DIR = os.path.join(OUT_DIR, 'windows_um') +WIN_WINRT_OUT_DIR = os.path.join(OUT_DIR, 'windows_winrt') +WIN_NETFX_OUT_DIR = os.path.join(OUT_DIR, 'windows_netfx') +WIN_OUT_JSON = os.path.join(OUT_DIR, 'windows.json') +WIN_OUT_JSON_WITH_UNUSED_TYPES = os.path.join(OUT_DIR, 'windows_all_types.json') +# +# Windows Drivers Kit paths. +# +WDK_KM_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_km') +WDK_MMOS_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_mmos') +WDK_SHARED_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_shared') +WDK_UM_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_um') +WDK_KMDF_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_kmdf') +WDK_UMDF_OUT_DIR = os.path.join(OUT_DIR, 'windrivers_umdf') +WDK_OUT_JSON = os.path.join(OUT_DIR, 'windrivers.json') + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('-i', '--json-indent', + dest='json_indent', + default=1, + help='Set indentation in JSON files.') + + parser.add_argument('-n', '--no-cleanup', + dest='no_cleanup', + default=True, + help='Do not remove dirs with JSONs for individual header files.') + + parser.add_argument('--sdk', + dest='sdk', + required=True, + help='SDK dir') + + parser.add_argument('--wdk', + dest='wdk', + required=True, + help='WDK dir') + + return parser.parse_args() + + +args = parse_args() + + +# +# Prints the given error message ($1) to stderr and exits. +# +def print_error_and_die(error): + sys.stderr.write('Error: ' + error) + sys.exit(1) + + +def remove_dir(path): + if os.path.isdir(path) and not os.path.islink(path): + shutil.rmtree(path) + elif os.path.exists(path): + os.remove(path) + + +# +# Removes temporary dirs and files used to generate JSONS that are merged later. +# +def remove_tmp_dirs_and_files(): + remove_dir(WIN_UCRT_OUT_DIR) + remove_dir(WIN_SHARED_OUT_DIR) + remove_dir(WIN_UM_OUT_DIR) + remove_dir(WIN_WINRT_OUT_DIR) + remove_dir(WIN_NETFX_OUT_DIR) + remove_dir(WIN_OUT_JSON_WITH_UNUSED_TYPES) + remove_dir(WDK_KM_OUT_DIR) + remove_dir(WDK_MMOS_OUT_DIR) + remove_dir(WDK_SHARED_OUT_DIR) + remove_dir(WDK_UM_OUT_DIR) + remove_dir(WDK_KMDF_OUT_DIR) + remove_dir(WDK_UMDF_OUT_DIR) + + +WIN_UCRT_IN_DIR = os.path.join(args.sdk, '10', 'Include', '10.0.10150.0', 'ucrt') +WIN_SHARED_IN_DIR = os.path.join(args.sdk, '10', 'Include', '10.0.10240.0', 'shared') +WIN_UM_IN_DIR = os.path.join(args.sdk, '10', 'Include', '10.0.10240.0', 'um') +WIN_WINRT_IN_DIR = os.path.join(args.sdk, '10', 'Include', '10.0.10240.0', 'winrt') +WIN_NETFX_IN_DIR = os.path.join(args.sdk, 'NETFXSDK', '4.6', 'Include', 'um') +WDK_KM_IN_DIR = os.path.join(args.wdk, '10.0.10586.0', 'km') +WDK_MMOS_IN_DIR = os.path.join(args.wdk, '10.0.10586.0', 'mmos') +WDK_SHARED_IN_DIR = os.path.join(args.wdk, '10.0.10586.0', 'shared') +WDK_UM_IN_DIR = os.path.join(args.wdk, '10.0.10586.0', 'um') +WDK_KMDF_IN_DIR = os.path.join(args.wdk, 'wdf', 'kmdf') +WDK_UMDF_IN_DIR = os.path.join(args.wdk, 'wdf', 'umdf') + +# +# Initial cleanup. +# +remove_tmp_dirs_and_files() + +os.makedirs(WIN_UCRT_OUT_DIR, exist_ok=True) +os.makedirs(WIN_SHARED_OUT_DIR, exist_ok=True) +os.makedirs(WIN_UM_OUT_DIR, exist_ok=True) +os.makedirs(WIN_WINRT_OUT_DIR, exist_ok=True) +os.makedirs(WIN_NETFX_OUT_DIR, exist_ok=True) +os.makedirs(WDK_KM_OUT_DIR, exist_ok=True) +os.makedirs(WDK_MMOS_OUT_DIR, exist_ok=True) +os.makedirs(WDK_SHARED_OUT_DIR, exist_ok=True) +os.makedirs(WDK_UM_OUT_DIR, exist_ok=True) +os.makedirs(WDK_KMDF_OUT_DIR, exist_ok=True) +os.makedirs(WDK_UMDF_OUT_DIR, exist_ok=True) + +# +# Parse the includes in the given Windows SDK directory and merge the generated +# JSON files. +# +subprocess.call([sys.executable, EXTRACTOR, WIN_UCRT_IN_DIR, '-o', WIN_UCRT_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WIN_SHARED_IN_DIR, '-o', WIN_SHARED_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WIN_UM_IN_DIR, '-o', WIN_UM_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WIN_WINRT_IN_DIR, '-o', WIN_WINRT_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WIN_NETFX_IN_DIR, '-o', WIN_NETFX_OUT_DIR], shell=True) +subprocess.call([sys.executable, MERGER, WIN_SHARED_OUT_DIR, WIN_UM_OUT_DIR, WIN_UCRT_OUT_DIR, WIN_WINRT_OUT_DIR, + WIN_NETFX_OUT_DIR, '-o', WIN_OUT_JSON, '--json-indent', args.json_indent], shell=True) + +# +# Parse the includes in the given WDK directory and merge the generated +# JSON files. +# +subprocess.call([sys.executable, EXTRACTOR, WDK_KM_IN_DIR, '-o', WDK_KM_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WDK_MMOS_IN_DIR, '-o', WDK_MMOS_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WDK_SHARED_IN_DIR, '-o', WDK_SHARED_OUT_DIR], shell=True) +subprocess.call([sys.executable, EXTRACTOR, WDK_UM_IN_DIR, '-o', WDK_UM_OUT_DIR], shell=True) + +for d in os.listdir(WDK_KMDF_IN_DIR): + subprocess.call([sys.executable, EXTRACTOR, os.path.join(WDK_KMDF_IN_DIR, d), '-o', WDK_KMDF_OUT_DIR], shell=True) + +for d in os.listdir(WDK_UMDF_IN_DIR): + subprocess.call([sys.executable, EXTRACTOR, os.path.join(WDK_UMDF_IN_DIR, d), '-o', WDK_UMDF_OUT_DIR], shell=True) + +subprocess.call([sys.executable, MERGER, WDK_SHARED_OUT_DIR, WDK_UM_OUT_DIR, WDK_KM_OUT_DIR, WDK_MMOS_OUT_DIR, + WDK_KMDF_OUT_DIR, WDK_UMDF_OUT_DIR, '-o', WDK_OUT_JSON, '--json-indent', args.json_indent], + shell=True) + +# +# WDK uses many types defined in Windows SDK. We need SDK JSON with all types extracted +# and merge it with WDK. SDK functions must be removed! +# +subprocess.call([sys.executable, MERGER, WIN_SHARED_OUT_DIR, WIN_UM_OUT_DIR, WIN_UCRT_OUT_DIR, WIN_WINRT_OUT_DIR, + WIN_NETFX_OUT_DIR, '-o', WIN_OUT_JSON_WITH_UNUSED_TYPES, '--json-indent', args.json_indent, + '--keep-unused-types'], shell=True) + +if args.json_indent == 0: + subprocess.call(['sed', '-i', '-e', 's/^.*\}, \'types\': \{/\{\'functions\': \{\}, \'types\': \{/', + WIN_OUT_JSON_WITH_UNUSED_TYPES], shell=True) +else: + TYPES_LINE_NUMBER = 0 # (os.popen('egrep -n \'^s*'types': {\' \''+(WIN_OUT_JSON_WITH_UNUSED_TYPES)+'\' | cut -f1 -d:').read().rip('\n')) + TYPES_LINE_NUMBER = (TYPES_LINE_NUMBER - 1) + subprocess.call(['sed', '-i', '-e', '1,' + TYPES_LINE_NUMBER + ' d', WIN_OUT_JSON_WITH_UNUSED_TYPES], shell=True) + subprocess.call(['sed', '-i', '-e', '1s/^/\{\'functions\': \{\},\n/', WIN_OUT_JSON_WITH_UNUSED_TYPES], shell=True) + +subprocess.call( + [sys.executable, MERGER, WDK_OUT_JSON, WIN_OUT_JSON_WITH_UNUSED_TYPES, '-o', WDK_OUT_JSON, '--json-indent', + args.json_indent], + shell=True) + +# +# Optional cleanup at the end. +# +if not args.no_cleanup: + remove_tmp_dirs_and_files() From 6db0e24a97901c80a0aa0f927db09609020da9ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 16:27:03 +0200 Subject: [PATCH 44/48] Try Unit tests --- scripts/retdec_tests_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index 1dbbc1aee..4a57773e5 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -42,7 +42,7 @@ def print_colored(message, color): def unit_tests_in_dir(path): """Prints paths to all unit tests in the given directory. 1 string argument is needed: - $1 path to the directory with unit tests + path-path to the directory with unit tests """ tests = [] @@ -61,7 +61,7 @@ def run_unit_tests_in_dir(path): """Runs all unit tests in the given directory. 1 string argument is needed: - $1 path to the directory with unit tests + path - path to the directory with unit tests Returns 0 if all tests passed, 1 otherwise. """ From 0654a515e1a21691c6a27460833ac56cad7aba59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 20:03:57 +0200 Subject: [PATCH 45/48] Try to fix test --- scripts/retdec_tests_runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index 4a57773e5..de7bc3988 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -7,6 +7,7 @@ import subprocess import retdec_config as config +from retdec_utils import CmdRunner """First argument can be verbose.""" if len(sys.argv) > 1: @@ -76,7 +77,9 @@ def run_unit_tests_in_dir(path): print() # TODO verbose support - return_code = subprocess.call([os.path.abspath(unit_test), '--gtest_color=yes'], shell=True) + cmd = CmdRunner() + output, return_code, _ = cmd.run_cmd([unit_test, '--gtest_color=yes']) + print(output) if return_code != 0: tests_failed = True From e3b80d5fbc34340a8404830a1467fc3538f0a978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Thu, 21 Jun 2018 20:36:50 +0200 Subject: [PATCH 46/48] Use absolute path instead --- scripts/retdec_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index de7bc3988..f4680cfaa 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -51,7 +51,7 @@ def unit_tests_in_dir(path): for file in os.listdir(path): file_name = os.path.basename(file) if file_name.startswith('retdec-tests-') and not file.endswith('.sh'): - tests.append(file) + tests.append(os.path.abspath(file)) tests.sort() From a04a067b235700db2dc62b0f105a4b17c6f53801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=B6sel?= Date: Tue, 3 Jul 2018 14:07:57 +0200 Subject: [PATCH 47/48] [skip ci] Add check for python scripts --- scripts/retdec_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/retdec_tests_runner.py b/scripts/retdec_tests_runner.py index f4680cfaa..7c40e59a0 100644 --- a/scripts/retdec_tests_runner.py +++ b/scripts/retdec_tests_runner.py @@ -50,7 +50,7 @@ def unit_tests_in_dir(path): for file in os.listdir(path): file_name = os.path.basename(file) - if file_name.startswith('retdec-tests-') and not file.endswith('.sh'): + if file_name.startswith('retdec-tests-') and not file.endswith('.sh') and not file.endswith('.py'): tests.append(os.path.abspath(file)) tests.sort() From c38bf5a28c503ec06689f762f38fb9c109e2c583 Mon Sep 17 00:00:00 2001 From: Peter Matula Date: Wed, 11 Jul 2018 15:49:39 +0200 Subject: [PATCH 48/48] scripts/retdec_decompiler.py: use output if specified via -o option --- scripts/retdec_decompiler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/retdec_decompiler.py b/scripts/retdec_decompiler.py index 4f0277623..312ec59bc 100755 --- a/scripts/retdec_decompiler.py +++ b/scripts/retdec_decompiler.py @@ -514,6 +514,8 @@ def _check_arguments(self): self.output_file = input_name[:-5] + self.args.hll else: self.output_file = self.input_file + '.' + self.args.hll + else: + self.output_file = self.args.output # If the output file name matches the input file name, we have to change the # output file name. Otherwise, the input file gets overwritten.