interfaces: [refactor] uniform seeding interface for all generators (…

…both prog and input gens) Co-authored-by: Connor Shugg <[email protected]>
microsoft · Mar 30, 2023 · b03baaa · b03baaa
1 parent 0c5c999
commit b03baaa
Show file tree

Hide file tree

Showing 8 changed files with 132 additions and 115 deletions.
diff --git a/src/factory.py b/src/factory.py
@@ -94,13 +94,14 @@ def get_fuzzer(instruction_set, working_directory, testcase, inputs):
     raise ConfigException("ERROR: unknown value of `fuzzer` configuration option")
 
 
-def get_generator(instruction_set: interfaces.InstructionSetAbstract) -> interfaces.Generator:
+def get_program_generator(instruction_set: interfaces.InstructionSetAbstract,
+                          seed: int) -> interfaces.Generator:
     return _get_from_config(GENERATORS, CONF.instruction_set + "-" + CONF.generator,
-                            "instruction_set", instruction_set)
+                            "instruction_set", instruction_set, seed)
 
 
-def get_input_generator() -> interfaces.InputGenerator:
-    return _get_from_config(INPUT_GENERATORS, CONF.input_generator, "input_generator")
+def get_input_generator(seed: int) -> interfaces.InputGenerator:
+    return _get_from_config(INPUT_GENERATORS, CONF.input_generator, "input_generator", seed)
 
 
 def get_model(bases: Tuple[int, int]) -> interfaces.Model:

diff --git a/src/fuzzer.py b/src/fuzzer.py
@@ -52,8 +52,9 @@ def _adjust_config(self, existing_test_case):
 
     def initialize_modules(self):
         """ create all main modules """
-        self.generator = factory.get_generator(self.instruction_set)
-        self.input_gen = factory.get_input_generator()
+        self.generator = factory.get_program_generator(self.instruction_set,
+                                                       CONF.program_generator_seed)
+        self.input_gen = factory.get_input_generator(CONF.input_gen_seed)
         self.executor = factory.get_executor()
         self.model = factory.get_model(self.executor.read_base_addresses())
         self.analyser = factory.get_analyser()
@@ -98,7 +99,7 @@ def start(self,
             if self.input_paths:
                 inputs = self.input_gen.load(self.input_paths)
             else:
-                inputs = self.input_gen.generate(CONF.input_gen_seed, num_inputs)
+                inputs = self.input_gen.generate(num_inputs)
             STAT.num_inputs += len(inputs) * CONF.inputs_per_class
 
             # Check if the test case is useful
@@ -222,8 +223,9 @@ def generate_test_batch(self, program_generator_seed: int, num_test_cases: int,
         # prepare for generation
         STAT.test_cases = num_test_cases
         CONF.program_generator_seed = program_generator_seed
-        program_gen = factory.get_generator(self.instruction_set)
-        input_gen = factory.get_input_generator()
+        program_gen = factory.get_program_generator(self.instruction_set,
+                                                    CONF.program_generator_seed)
+        input_gen = factory.get_input_generator(CONF.input_gen_seed)
 
         # generate test cases
         Path(self.work_dir).mkdir(exist_ok=True)
@@ -236,7 +238,7 @@ def generate_test_batch(self, program_generator_seed: int, num_test_cases: int,
                              "       Use --permit-overwrite to overwrite the test case")
 
             program_gen.create_test_case(test_case_dir + "/" + "program.asm", True)
-            inputs = input_gen.generate(CONF.input_gen_seed, num_inputs)
+            inputs = input_gen.generate(num_inputs)
             for j, input_ in enumerate(inputs):
                 input_.save(f"{test_case_dir}/input{j}.bin")
 
@@ -262,7 +264,7 @@ def analyse_traces_from_files(ctrace_file: str, htrace_file: str):
         assert len(ctraces) == len(htraces), \
             "The number of hardware traces does not match the number of contract traces"
 
-        dummy_inputs = factory.get_input_generator().generate(1, len(ctraces))
+        dummy_inputs = factory.get_input_generator(0).generate(len(ctraces))
 
         # check for violations
         analyser = factory.get_analyser()

diff --git a/src/generator.py b/src/generator.py
@@ -67,8 +67,8 @@ class ConfigurableGenerator(Generator, abc.ABC):
     printer: Printer  # set by subclasses
     target_desc: TargetDesc  # set by subclasses
 
-    def __init__(self, instruction_set: InstructionSet):
-        super().__init__(instruction_set)
+    def __init__(self, instruction_set: InstructionSet, seed: int):
+        super().__init__(instruction_set, seed)
         LOGGER.dbg_gen_instructions(instruction_set.instructions)
         self.control_flow_instructions = \
             [i for i in self.instruction_set.instructions if i.control_flow]
@@ -91,8 +91,9 @@ def __init__(self, instruction_set: InstructionSet):
             assert self.load_instruction or self.store_instructions, \
                 "The instruction set does not have memory accesses while `avg_mem_accesses > 0`"
 
-        if CONF.program_generator_seed:
-            random.seed(CONF.program_generator_seed)
+    def set_seed(self, seed: int) -> None:
+        if seed:
+            random.seed(seed)
 
     def create_test_case(self, asm_file: str, disable_assembler: bool = False) -> TestCase:
         self.test_case = TestCase()
@@ -379,8 +380,8 @@ class RandomGenerator(ConfigurableGenerator, abc.ABC):
     """
     had_recent_memory_access: bool = False
 
-    def __init__(self, instruction_set: InstructionSet):
-        super().__init__(instruction_set)
+    def __init__(self, instruction_set: InstructionSet, seed: int):
+        super().__init__(instruction_set, seed)
         uncond_name = self.get_unconditional_jump_instruction().name.lower()
         self.cond_branches = \
             [i for i in self.control_flow_instructions if i.name.lower() != uncond_name]
@@ -554,6 +555,7 @@ def generate_cond_operand(self, spec: OperandSpec, _: Instruction) -> Operand:
         return CondOperand(cond)
 
     def add_terminators_in_function(self, func: Function):
+
         def add_fallthrough(bb: BasicBlock, destination: BasicBlock):
             # create an unconditional branch and add it
             terminator = self.get_unconditional_jump_instruction()

diff --git a/src/input_generator.py b/src/input_generator.py
@@ -7,7 +7,8 @@
 import os
 import random
 import numpy as np
-from typing import List, Tuple
+from abc import abstractmethod
+from typing import List
 from interfaces import Input, InputTaint, InputGenerator
 from config import CONF
 from service import LOGGER
@@ -17,75 +18,83 @@
 
 class InputGeneratorCommon(InputGenerator):
 
-    def load(self, input_paths: List[str]) -> List[Input]:
-        inputs = []
-        for input_path in input_paths:
-            input_ = Input()
-
-            # check that the file is not corrupted
-            size = os.path.getsize(input_path)
-            if size != len(input_) * 8:
-                LOGGER.error(f"Incorrect size of input `{input_path}` "
-                             f"({size} B, expected {len(input_) * 8} B)")
-
-            input_.load(input_path)
-            inputs.append(input_)
-        return inputs
-
-
-class LegacyRandomInputGenerator(InputGeneratorCommon):
-    """
-    Legacy implementation. Will be deprecated in the future because of low performance.
-    Simple 32-bit LCG with a=2891336453 and c=54321.
-    """
+    @abstractmethod
+    def _generate_one(self) -> Input:
+        pass
 
-    def __init__(self):
-        super().__init__()
-        self.input_mask = pow(2, (CONF.input_gen_entropy_bits % 33)) - 1
-
-    def generate(self, seed: int, count: int) -> List[Input]:
-        if seed == 0:
-            seed = random.randint(0, pow(2, 32) - 1)
-            LOGGER.inform("input_gen", str(seed))
+    def generate(self, count: int) -> List[Input]:
+        # if it's the first invocation and the seed is zero - use random seed
+        if self._state == 0:
+            self._state = random.randint(0, pow(2, 32) - 1)
+            LOGGER.inform("input_gen", f"Setting input seed to: {self._state}")
 
         generated_inputs = []
-        for i in range(count):
-            input_, seed = self._generate_one(seed)
+        for _ in range(count):
+            input_ = self._generate_one()
             generated_inputs.append(input_)
         return generated_inputs
 
     def extend_equivalence_classes(self, inputs: List[Input],
                                    taints: List[InputTaint]) -> List[Input]:
+        """
+        Produce a new sequence of random inputs, but copy the tainted values from
+        the base sequence
+        """
         if len(inputs) != len(taints):
             raise Exception("Error: Cannot extend inputs. "
                             "The number of taints does not match the number of inputs.")
+        # this function is technically not a generation function,
+        # hence it should not update the global generation seed
+        initial_state = self._state
 
-        # continue the sequence of random values from the last one
-        # in the previous input sequence
-        _, seed = self._generate_one(inputs[-1].seed)
-
-        # produce a new sequence of random inputs, but copy the tainted values from
-        # the previous sequence
+        # create inputs
         new_inputs = []
         for i, input_ in enumerate(inputs):
             taint = taints[i]
-            new_input, seed = self._generate_one(seed)
+            new_input = self._generate_one()
             for j in range(input_.data_size):
                 if taint[j]:
                     new_input[j] = input_[j]
             new_inputs.append(new_input)
 
+        self._state = initial_state
         return new_inputs
 
-    def _generate_one(self, seed: int) -> Tuple[Input, int]:
+    def load(self, input_paths: List[str]) -> List[Input]:
+        inputs = []
+        for input_path in input_paths:
+            input_ = Input()
+
+            # check that the file is not corrupted
+            size = os.path.getsize(input_path)
+            if size != len(input_) * 8:
+                LOGGER.error(f"Incorrect size of input `{input_path}` "
+                             f"({size} B, expected {len(input_) * 8} B)")
+
+            input_.load(input_path)
+            inputs.append(input_)
+        return inputs
+
+
+class LegacyRandomInputGenerator(InputGeneratorCommon):
+    """
+    Legacy implementation. Exist only for backwards compatibility.
+    NumpyRandomInputGenerator is a preferred implementation.
+    Implements a simple 32-bit LCG with a=2891336453 and c=54321.
+    """
+
+    def __init__(self, seed: int):
+        super().__init__(seed)
+        self.input_mask = pow(2, (CONF.input_gen_entropy_bits % 33)) - 1
+
+    def _generate_one(self) -> Input:
         input_ = Input()
-        input_.seed = seed
+        input_.seed = self._state
 
-        randint = seed
+        randint = self._state
         for i in range(input_.data_size):
             # this weird implementation is a legacy of our old PRNG.
             # basically, it's a 32-bit PRNG, assigned to 4-byte chucks of memory
-            # TODO: replace it with a more sane implementation after the artifact is done
             randint = ((randint * 2891336453) % POW32 + 54321) % POW32
             masked_rvalue = (randint ^ (randint >> 16)) & self.input_mask
             masked_rvalue = masked_rvalue << 6
@@ -101,57 +110,25 @@ def _generate_one(self, seed: int) -> Tuple[Input, int]:
         for i in range(CONF.input_register_region_size // 8):
             input_[-i - 1] = input_[-i - 1] % POW32
 
-        return input_, randint
+        self._state = randint
+        return input_
 
 
 class NumpyRandomInputGenerator(InputGeneratorCommon):
     """ Numpy-based implementation of the input gen """
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, seed: int):
+        super().__init__(seed)
         self.max_input_value = pow(2, CONF.input_gen_entropy_bits)
 
-    def generate(self, seed: int, count: int) -> List[Input]:
-        if seed == 0:
-            seed = random.randint(0, pow(2, 32) - 1)
-            LOGGER.inform("input_gen", str(seed))
-
-        generated_inputs = []
-        for _ in range(count):
-            input_, seed = self._generate_one(seed)
-            generated_inputs.append(input_)
-        return generated_inputs
-
-    def extend_equivalence_classes(self, inputs: List[Input],
-                                   taints: List[InputTaint]) -> List[Input]:
-        if len(inputs) != len(taints):
-            raise Exception("Error: Cannot extend inputs. "
-                            "The number of taints does not match the number of inputs.")
-
-        # continue the sequence of random values from the last one
-        # in the previous input sequence
-        _, seed = self._generate_one(inputs[-1].seed)
-
-        # produce a new sequence of random inputs, but copy the tainted values from
-        # the previous sequence
-        new_inputs = []
-        for i, input_ in enumerate(inputs):
-            taint = taints[i]
-            new_input, seed = self._generate_one(seed)
-            for j in range(input_.data_size):
-                if taint[j]:
-                    new_input[j] = input_[j]
-            new_inputs.append(new_input)
-
-        return new_inputs
-
-    def _generate_one(self, seed: int) -> Tuple[Input, int]:
+    def _generate_one(self) -> Input:
         input_ = Input()
-        input_.seed = seed
+        input_.seed = self._state
 
-        rng = np.random.default_rng(seed)
+        rng = np.random.default_rng(seed=self._state)
         data = rng.integers(self.max_input_value, size=input_.data_size, dtype=np.uint64)
         data = data << CONF.memory_access_zeroed_bits  # type: ignore
         input_[:input_.data_size] = (data << 32) + data
 
-        return input_, seed + 1
+        self._state += 1
+        return input_
diff --git a/src/interfaces.py b/src/interfaces.py
@@ -700,11 +700,28 @@ def is_call(inst: Instruction) -> bool:
 
 class Generator(ABC):
     instruction_set: InstructionSetAbstract
+    _state: int = 0
 
-    def __init__(self, instruction_set: InstructionSetAbstract):
+    def __init__(self, instruction_set: InstructionSetAbstract, seed: int):
         self.instruction_set = instruction_set
+        self.set_seed(seed)
         super().__init__()
 
+    def set_seed(self, seed: int) -> None:
+        """
+        Set the seed value used to generate test programs
+        :param seed: The seed value
+        """
+        self._state = seed
+
+    def get_state(self) -> int:
+        """
+        Get the current state of the generator.
+        The method complements and is compatible with `set_seed`.
+        :return: Current state of the generator
+        """
+        return self._state
+
     @abstractmethod
     def create_test_case(self, path: str, disable_assembler: bool = False) -> TestCase:
         """
@@ -732,9 +749,27 @@ def create_pte(self, test_case: TestCase) -> None:
 
 
 class InputGenerator(ABC):
+    _state: int = 0
+
+    def __init__(self, seed: int):
+        self.set_seed(seed)
+        super().__init__()
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed value used to generate inputs
+        :param seed: The seed value
+        """
+        self._state = seed
+
+    def get_seed(self) -> int:
+        """Get the current state of the generator.
+        The method complements and is compatible with `set_seed`.
+        :return: Current state of the generator
+        """
+        return self._state
 
     @abstractmethod
-    def generate(self, seed: int, count: int) -> List[Input]:
+    def generate(self, count: int) -> List[Input]:
         pass
 
     @abstractmethod