Skip to content

Commit

Permalink
interfaces: [refactor] uniform seeding interface for all generators (…
Browse files Browse the repository at this point in the history
…both prog and input gens)

Co-authored-by: Connor Shugg <[email protected]>
  • Loading branch information
OleksiiOleksenko and cwshugg committed Mar 30, 2023
1 parent 0c5c999 commit b03baaa
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 115 deletions.
9 changes: 5 additions & 4 deletions src/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,14 @@ def get_fuzzer(instruction_set, working_directory, testcase, inputs):
raise ConfigException("ERROR: unknown value of `fuzzer` configuration option")


def get_generator(instruction_set: interfaces.InstructionSetAbstract) -> interfaces.Generator:
def get_program_generator(instruction_set: interfaces.InstructionSetAbstract,
seed: int) -> interfaces.Generator:
return _get_from_config(GENERATORS, CONF.instruction_set + "-" + CONF.generator,
"instruction_set", instruction_set)
"instruction_set", instruction_set, seed)


def get_input_generator() -> interfaces.InputGenerator:
return _get_from_config(INPUT_GENERATORS, CONF.input_generator, "input_generator")
def get_input_generator(seed: int) -> interfaces.InputGenerator:
return _get_from_config(INPUT_GENERATORS, CONF.input_generator, "input_generator", seed)


def get_model(bases: Tuple[int, int]) -> interfaces.Model:
Expand Down
16 changes: 9 additions & 7 deletions src/fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def _adjust_config(self, existing_test_case):

def initialize_modules(self):
""" create all main modules """
self.generator = factory.get_generator(self.instruction_set)
self.input_gen = factory.get_input_generator()
self.generator = factory.get_program_generator(self.instruction_set,
CONF.program_generator_seed)
self.input_gen = factory.get_input_generator(CONF.input_gen_seed)
self.executor = factory.get_executor()
self.model = factory.get_model(self.executor.read_base_addresses())
self.analyser = factory.get_analyser()
Expand Down Expand Up @@ -98,7 +99,7 @@ def start(self,
if self.input_paths:
inputs = self.input_gen.load(self.input_paths)
else:
inputs = self.input_gen.generate(CONF.input_gen_seed, num_inputs)
inputs = self.input_gen.generate(num_inputs)
STAT.num_inputs += len(inputs) * CONF.inputs_per_class

# Check if the test case is useful
Expand Down Expand Up @@ -222,8 +223,9 @@ def generate_test_batch(self, program_generator_seed: int, num_test_cases: int,
# prepare for generation
STAT.test_cases = num_test_cases
CONF.program_generator_seed = program_generator_seed
program_gen = factory.get_generator(self.instruction_set)
input_gen = factory.get_input_generator()
program_gen = factory.get_program_generator(self.instruction_set,
CONF.program_generator_seed)
input_gen = factory.get_input_generator(CONF.input_gen_seed)

# generate test cases
Path(self.work_dir).mkdir(exist_ok=True)
Expand All @@ -236,7 +238,7 @@ def generate_test_batch(self, program_generator_seed: int, num_test_cases: int,
" Use --permit-overwrite to overwrite the test case")

program_gen.create_test_case(test_case_dir + "/" + "program.asm", True)
inputs = input_gen.generate(CONF.input_gen_seed, num_inputs)
inputs = input_gen.generate(num_inputs)
for j, input_ in enumerate(inputs):
input_.save(f"{test_case_dir}/input{j}.bin")

Expand All @@ -262,7 +264,7 @@ def analyse_traces_from_files(ctrace_file: str, htrace_file: str):
assert len(ctraces) == len(htraces), \
"The number of hardware traces does not match the number of contract traces"

dummy_inputs = factory.get_input_generator().generate(1, len(ctraces))
dummy_inputs = factory.get_input_generator(0).generate(len(ctraces))

# check for violations
analyser = factory.get_analyser()
Expand Down
14 changes: 8 additions & 6 deletions src/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class ConfigurableGenerator(Generator, abc.ABC):
printer: Printer # set by subclasses
target_desc: TargetDesc # set by subclasses

def __init__(self, instruction_set: InstructionSet):
super().__init__(instruction_set)
def __init__(self, instruction_set: InstructionSet, seed: int):
super().__init__(instruction_set, seed)
LOGGER.dbg_gen_instructions(instruction_set.instructions)
self.control_flow_instructions = \
[i for i in self.instruction_set.instructions if i.control_flow]
Expand All @@ -91,8 +91,9 @@ def __init__(self, instruction_set: InstructionSet):
assert self.load_instruction or self.store_instructions, \
"The instruction set does not have memory accesses while `avg_mem_accesses > 0`"

if CONF.program_generator_seed:
random.seed(CONF.program_generator_seed)
def set_seed(self, seed: int) -> None:
if seed:
random.seed(seed)

def create_test_case(self, asm_file: str, disable_assembler: bool = False) -> TestCase:
self.test_case = TestCase()
Expand Down Expand Up @@ -379,8 +380,8 @@ class RandomGenerator(ConfigurableGenerator, abc.ABC):
"""
had_recent_memory_access: bool = False

def __init__(self, instruction_set: InstructionSet):
super().__init__(instruction_set)
def __init__(self, instruction_set: InstructionSet, seed: int):
super().__init__(instruction_set, seed)
uncond_name = self.get_unconditional_jump_instruction().name.lower()
self.cond_branches = \
[i for i in self.control_flow_instructions if i.name.lower() != uncond_name]
Expand Down Expand Up @@ -554,6 +555,7 @@ def generate_cond_operand(self, spec: OperandSpec, _: Instruction) -> Operand:
return CondOperand(cond)

def add_terminators_in_function(self, func: Function):

def add_fallthrough(bb: BasicBlock, destination: BasicBlock):
# create an unconditional branch and add it
terminator = self.get_unconditional_jump_instruction()
Expand Down
145 changes: 61 additions & 84 deletions src/input_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import os
import random
import numpy as np
from typing import List, Tuple
from abc import abstractmethod
from typing import List
from interfaces import Input, InputTaint, InputGenerator
from config import CONF
from service import LOGGER
Expand All @@ -17,75 +18,83 @@

class InputGeneratorCommon(InputGenerator):

def load(self, input_paths: List[str]) -> List[Input]:
inputs = []
for input_path in input_paths:
input_ = Input()

# check that the file is not corrupted
size = os.path.getsize(input_path)
if size != len(input_) * 8:
LOGGER.error(f"Incorrect size of input `{input_path}` "
f"({size} B, expected {len(input_) * 8} B)")

input_.load(input_path)
inputs.append(input_)
return inputs


class LegacyRandomInputGenerator(InputGeneratorCommon):
"""
Legacy implementation. Will be deprecated in the future because of low performance.
Simple 32-bit LCG with a=2891336453 and c=54321.
"""
@abstractmethod
def _generate_one(self) -> Input:
pass

def __init__(self):
super().__init__()
self.input_mask = pow(2, (CONF.input_gen_entropy_bits % 33)) - 1

def generate(self, seed: int, count: int) -> List[Input]:
if seed == 0:
seed = random.randint(0, pow(2, 32) - 1)
LOGGER.inform("input_gen", str(seed))
def generate(self, count: int) -> List[Input]:
# if it's the first invocation and the seed is zero - use random seed
if self._state == 0:
self._state = random.randint(0, pow(2, 32) - 1)
LOGGER.inform("input_gen", f"Setting input seed to: {self._state}")

generated_inputs = []
for i in range(count):
input_, seed = self._generate_one(seed)
for _ in range(count):
input_ = self._generate_one()
generated_inputs.append(input_)
return generated_inputs

def extend_equivalence_classes(self, inputs: List[Input],
taints: List[InputTaint]) -> List[Input]:
"""
Produce a new sequence of random inputs, but copy the tainted values from
the base sequence
"""
if len(inputs) != len(taints):
raise Exception("Error: Cannot extend inputs. "
"The number of taints does not match the number of inputs.")
# this function is technically not a generation function,
# hence it should not update the global generation seed
initial_state = self._state

# continue the sequence of random values from the last one
# in the previous input sequence
_, seed = self._generate_one(inputs[-1].seed)

# produce a new sequence of random inputs, but copy the tainted values from
# the previous sequence
# create inputs
new_inputs = []
for i, input_ in enumerate(inputs):
taint = taints[i]
new_input, seed = self._generate_one(seed)
new_input = self._generate_one()
for j in range(input_.data_size):
if taint[j]:
new_input[j] = input_[j]
new_inputs.append(new_input)

self._state = initial_state
return new_inputs

def _generate_one(self, seed: int) -> Tuple[Input, int]:
def load(self, input_paths: List[str]) -> List[Input]:
inputs = []
for input_path in input_paths:
input_ = Input()

# check that the file is not corrupted
size = os.path.getsize(input_path)
if size != len(input_) * 8:
LOGGER.error(f"Incorrect size of input `{input_path}` "
f"({size} B, expected {len(input_) * 8} B)")

input_.load(input_path)
inputs.append(input_)
return inputs


class LegacyRandomInputGenerator(InputGeneratorCommon):
"""
Legacy implementation. Exist only for backwards compatibility.
NumpyRandomInputGenerator is a preferred implementation.
Implements a simple 32-bit LCG with a=2891336453 and c=54321.
"""

def __init__(self, seed: int):
super().__init__(seed)
self.input_mask = pow(2, (CONF.input_gen_entropy_bits % 33)) - 1

def _generate_one(self) -> Input:
input_ = Input()
input_.seed = seed
input_.seed = self._state

randint = seed
randint = self._state
for i in range(input_.data_size):
# this weird implementation is a legacy of our old PRNG.
# basically, it's a 32-bit PRNG, assigned to 4-byte chucks of memory
# TODO: replace it with a more sane implementation after the artifact is done
randint = ((randint * 2891336453) % POW32 + 54321) % POW32
masked_rvalue = (randint ^ (randint >> 16)) & self.input_mask
masked_rvalue = masked_rvalue << 6
Expand All @@ -101,57 +110,25 @@ def _generate_one(self, seed: int) -> Tuple[Input, int]:
for i in range(CONF.input_register_region_size // 8):
input_[-i - 1] = input_[-i - 1] % POW32

return input_, randint
self._state = randint
return input_


class NumpyRandomInputGenerator(InputGeneratorCommon):
""" Numpy-based implementation of the input gen """

def __init__(self):
super().__init__()
def __init__(self, seed: int):
super().__init__(seed)
self.max_input_value = pow(2, CONF.input_gen_entropy_bits)

def generate(self, seed: int, count: int) -> List[Input]:
if seed == 0:
seed = random.randint(0, pow(2, 32) - 1)
LOGGER.inform("input_gen", str(seed))

generated_inputs = []
for _ in range(count):
input_, seed = self._generate_one(seed)
generated_inputs.append(input_)
return generated_inputs

def extend_equivalence_classes(self, inputs: List[Input],
taints: List[InputTaint]) -> List[Input]:
if len(inputs) != len(taints):
raise Exception("Error: Cannot extend inputs. "
"The number of taints does not match the number of inputs.")

# continue the sequence of random values from the last one
# in the previous input sequence
_, seed = self._generate_one(inputs[-1].seed)

# produce a new sequence of random inputs, but copy the tainted values from
# the previous sequence
new_inputs = []
for i, input_ in enumerate(inputs):
taint = taints[i]
new_input, seed = self._generate_one(seed)
for j in range(input_.data_size):
if taint[j]:
new_input[j] = input_[j]
new_inputs.append(new_input)

return new_inputs

def _generate_one(self, seed: int) -> Tuple[Input, int]:
def _generate_one(self) -> Input:
input_ = Input()
input_.seed = seed
input_.seed = self._state

rng = np.random.default_rng(seed)
rng = np.random.default_rng(seed=self._state)
data = rng.integers(self.max_input_value, size=input_.data_size, dtype=np.uint64)
data = data << CONF.memory_access_zeroed_bits # type: ignore
input_[:input_.data_size] = (data << 32) + data

return input_, seed + 1
self._state += 1
return input_
39 changes: 37 additions & 2 deletions src/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,11 +700,28 @@ def is_call(inst: Instruction) -> bool:

class Generator(ABC):
instruction_set: InstructionSetAbstract
_state: int = 0

def __init__(self, instruction_set: InstructionSetAbstract):
def __init__(self, instruction_set: InstructionSetAbstract, seed: int):
self.instruction_set = instruction_set
self.set_seed(seed)
super().__init__()

def set_seed(self, seed: int) -> None:
"""
Set the seed value used to generate test programs
:param seed: The seed value
"""
self._state = seed

def get_state(self) -> int:
"""
Get the current state of the generator.
The method complements and is compatible with `set_seed`.
:return: Current state of the generator
"""
return self._state

@abstractmethod
def create_test_case(self, path: str, disable_assembler: bool = False) -> TestCase:
"""
Expand Down Expand Up @@ -732,9 +749,27 @@ def create_pte(self, test_case: TestCase) -> None:


class InputGenerator(ABC):
_state: int = 0

def __init__(self, seed: int):
self.set_seed(seed)
super().__init__()

def set_seed(self, seed: int) -> None:
"""Set the seed value used to generate inputs
:param seed: The seed value
"""
self._state = seed

def get_seed(self) -> int:
"""Get the current state of the generator.
The method complements and is compatible with `set_seed`.
:return: Current state of the generator
"""
return self._state

@abstractmethod
def generate(self, seed: int, count: int) -> List[Input]:
def generate(self, count: int) -> List[Input]:
pass

@abstractmethod
Expand Down
Loading

0 comments on commit b03baaa

Please sign in to comment.