Skip to content

Commit

Permalink
Basic block analyser (#981)
Browse files Browse the repository at this point in the history
* Add directory `Analysers`

* entry point analyser

* basic block analyser (tested but unused)

* fix tests

* Apply suggestions from code review

* Update tests/Neo.Compiler.CSharp.UnitTests/UnitTest_Optimizer/UnitTest_BasicBlock.cs

* Update src/Neo.Compiler.CSharp/Optimizer/Analysers/InstructionCoverage.cs

* fix ut test

* Remove generatedRegex

* string.IsNullOrEmpty

---------

Co-authored-by: Shargon <[email protected]>
Co-authored-by: Jimmy <[email protected]>
Co-authored-by: Jim8y <[email protected]>
  • Loading branch information
4 people authored Mar 5, 2024
1 parent c68b73e commit 3c16740
Show file tree
Hide file tree
Showing 11 changed files with 429 additions and 296 deletions.
14 changes: 14 additions & 0 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/BasicBlock.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Neo.Json;
using Neo.SmartContract;
using Neo.SmartContract.Manifest;
using Neo.VM;
using System.Collections.Generic;

namespace Neo.Optimizer
{
static class BasicBlock
{
public static Dictionary<int, Dictionary<int, Instruction>> FindBasicBlocks(NefFile nef, ContractManifest manifest, JToken debugInfo)
=> new InstructionCoverage(nef, manifest, debugInfo).basicBlocks;
}
}
269 changes: 269 additions & 0 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/InstructionCoverage.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
using Neo.Json;
using Neo.SmartContract;
using Neo.SmartContract.Manifest;
using Neo.VM;
using System;
using System.Collections.Generic;
using System.Linq;
using static Neo.Optimizer.JumpTarget;
using static Neo.Optimizer.OpCodeTypes;

namespace Neo.Optimizer
{
public enum TryStackType
{
ENTRY,
TRY,
CATCH,
FINALLY,
}

public enum BranchType
{
OK, // One of the branches may return without exception
THROW, // All branches surely have exceptions, but can be catched
ABORT, // All branches abort, and cannot be catched
UNCOVERED,
}

public class InstructionCoverage
{
Script script;
// Starting from the address, whether the call will surely throw or surely abort, or may be OK
public Dictionary<int, BranchType> coveredMap { get; protected set; }
public Dictionary<int, Dictionary<int, Instruction>> basicBlocks { get; protected set; }
public List<(int a, Instruction i)> addressAndInstructions { get; init; }
public Dictionary<int, HashSet<int>> jumpTargetToSources { get; init; }
public InstructionCoverage(NefFile nef, ContractManifest manifest, JToken debugInfo)
{
this.script = nef.Script;
coveredMap = new();
basicBlocks = new();
addressAndInstructions = script.EnumerateInstructions().ToList();
(_, _, jumpTargetToSources) = FindAllJumpAndTrySourceToTargets(addressAndInstructions);
foreach ((int addr, Instruction _) in addressAndInstructions)
coveredMap.Add(addr, BranchType.UNCOVERED);

// It is unsafe to go parallel, because the coveredMap value is not true/false
//Parallel.ForEach(manifest.Abi.Methods, method =>
// CoverInstruction(method.Offset, script, coveredMap)
//);
foreach ((int addr, _) in EntryPoint.EntryPointsByMethod(manifest, debugInfo))
CoverInstruction(addr);
}

public static Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> CopyStack
(Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack) => new(stack.Reverse());

public BranchType HandleThrow(int entranceAddr, int addr, Stack<((int catchAddr, int finallyAddr), TryStackType stackType)> stack)
{
stack = CopyStack(stack);
TryStackType stackType;
int catchAddr; int finallyAddr;
do
((catchAddr, finallyAddr), stackType) = stack.Pop();
while (stackType != TryStackType.TRY && stackType != TryStackType.CATCH && stack.Count > 0);
if (stackType == TryStackType.TRY) // goto CATCH or FINALLY
{
// try with catch: cancel throw and execute catch
if (catchAddr != -1)
{
addr = catchAddr;
stack.Push(((-1, finallyAddr), TryStackType.CATCH));
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: false);
return coveredMap[entranceAddr];
}
// try without catch: execute finally but keep throwing
else if (finallyAddr != -1)
{
coveredMap[addr] = BranchType.THROW;
addr = finallyAddr;
stack.Push(((-1, -1), TryStackType.FINALLY));
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: true);
return coveredMap[entranceAddr];
}
}
// throwed in catch with finally: execute finally but keep throwing
if (stackType == TryStackType.CATCH)
{
if (finallyAddr != -1)
{
addr = finallyAddr;
stack.Push(((-1, -1), TryStackType.FINALLY));
}
return CoverInstruction(addr, stack: stack, throwed: true);
}
// not in try and not in catch
coveredMap[entranceAddr] = BranchType.THROW;
return BranchType.THROW;
}

public BranchType HandleAbort(int entranceAddr, int addr, Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack)
{
// See if we are in a try or catch. There may still be runtime exceptions
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType == TryStackType.TRY && catchAddr != -1 ||
stackType == TryStackType.CATCH && finallyAddr != -1)
{
// Visit catchAddr because there may still be exceptions at runtime
if (HandleThrow(entranceAddr, addr, stack) == BranchType.OK)
{
coveredMap[entranceAddr] = BranchType.OK;
return BranchType.OK;
}
}
coveredMap[entranceAddr] = BranchType.ABORT;
return coveredMap[entranceAddr];
}

/// <summary>
/// Cover a basic block, and recursively cover all branches
/// </summary>
/// <param name="addr"></param>
/// <param name="script"></param>
/// <param name="coveredMap"></param>
/// <returns>Whether it is possible to return without exception</returns>
/// <exception cref="BadScriptException"></exception>
/// <exception cref="NotImplementedException"></exception>
public BranchType CoverInstruction(int addr,
Stack<((int returnAddr, int finallyAddr), TryStackType stackType)>? stack = null,
bool throwed = false)
{
int entranceAddr = addr;
if (stack == null)
{
stack = new();
stack.Push(((-1, -1), TryStackType.ENTRY));
}
else
stack = CopyStack(stack);
if (throwed)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType != TryStackType.FINALLY)
{
coveredMap[entranceAddr] = BranchType.THROW;
return BranchType.THROW;
}
}
while (true)
{
// For the analysis of basic blocks,
// we launched new recursion when exception is catched.
// Here we have the exception not catched
if (!coveredMap.ContainsKey(addr))
throw new BadScriptException($"wrong address {addr}");
if (coveredMap[addr] != BranchType.UNCOVERED)
// We have visited the code. Skip it.
return coveredMap[addr];
if (jumpTargetToSources.ContainsKey(addr) && addr != entranceAddr)
// on target of jump, start a new recursion to split basic blocks
return CoverInstruction(addr, stack, throwed);
Instruction instruction = script.GetInstruction(addr);
if (instruction.OpCode != OpCode.NOP)
{
coveredMap[addr] = BranchType.OK;
// Add a basic block starting from entranceAddr
if (!basicBlocks.TryGetValue(entranceAddr, out Dictionary<int, Instruction>? instructions))
{
instructions = new Dictionary<int, Instruction>();
basicBlocks.Add(entranceAddr, instructions);
}
// Add this instruction to the basic block starting from entranceAddr
instructions.Add(addr, instruction);
}

// TODO: ABORTMSG may THROW instead of ABORT. Just throw new NotImplementedException for ABORTMSG?
if (instruction.OpCode == OpCode.ABORT || instruction.OpCode == OpCode.ABORTMSG)
return HandleAbort(entranceAddr, addr, stack);
if (callWithJump.Contains(instruction.OpCode))
{
int callTarget = ComputeJumpTarget(addr, instruction);
BranchType returnedType = CoverInstruction(callTarget);
if (returnedType == BranchType.OK)
return CoverInstruction(addr + instruction.Size, stack);
if (returnedType == BranchType.ABORT)
return HandleAbort(entranceAddr, addr, stack);
if (returnedType == BranchType.THROW)
return HandleThrow(entranceAddr, addr, stack);
}
if (instruction.OpCode == OpCode.RET)
{
// See if we are in a try. There may still be runtime exceptions
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;
return coveredMap[entranceAddr];
}
if (tryThrowFinally.Contains(instruction.OpCode))
{
if (instruction.OpCode == OpCode.TRY || instruction.OpCode == OpCode.TRY_L)
{
stack.Push((ComputeTryTarget(addr, instruction), TryStackType.TRY));
return CoverInstruction(addr + instruction.Size, stack);
}
if (instruction.OpCode == OpCode.THROW)
return HandleThrow(entranceAddr, addr, stack);
if (instruction.OpCode == OpCode.ENDTRY || instruction.OpCode == OpCode.ENDTRY_L)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType != TryStackType.TRY && stackType != TryStackType.CATCH)
throw new BadScriptException("No try stack on ENDTRY");

// Visit catchAddr and finallyAddr because there may still be exceptions at runtime
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;

stack.Pop();
int endPointer = ComputeJumpTarget(addr, instruction);
if (finallyAddr != -1)
{
stack.Push(((-1, endPointer), TryStackType.FINALLY));
addr = finallyAddr;
}
else
addr = endPointer;
return CoverInstruction(addr, stack, throwed);
}
if (instruction.OpCode == OpCode.ENDFINALLY)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Pop();
if (stackType != TryStackType.FINALLY)
throw new BadScriptException("No finally stack on ENDFINALLY");
if (throwed)
{
// For this basic block in finally, the branch type is OK
coveredMap[entranceAddr] = BranchType.OK;
// The throw is caused by previous codes
return BranchType.THROW;
}
return CoverInstruction(addr + instruction.Size, stack, false);
}
}
if (unconditionalJump.Contains(instruction.OpCode))
//addr = ComputeJumpTarget(addr, instruction);
//continue;
// For the analysis of basic blocks, we launch a new recursion
return CoverInstruction(ComputeJumpTarget(addr, instruction), stack, throwed);
if (conditionalJump.Contains(instruction.OpCode) || conditionalJump_L.Contains(instruction.OpCode))
{
BranchType noJump = CoverInstruction(addr + instruction.Size, stack);
BranchType jump = CoverInstruction(ComputeJumpTarget(addr, instruction), stack);
if (noJump == BranchType.OK || jump == BranchType.OK)
{
// See if we are in a try. There may still be runtime exceptions
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;
return coveredMap[entranceAddr];
}
if (noJump == BranchType.ABORT && jump == BranchType.ABORT)
return HandleAbort(entranceAddr, addr, stack);
if (noJump == BranchType.THROW || jump == BranchType.THROW) // THROW, ABORT => THROW
return HandleThrow(entranceAddr, addr, stack);
throw new Exception($"Unknown {nameof(BranchType)} {noJump} {jump}");
}

addr += instruction.Size;
}
}
}
}
51 changes: 29 additions & 22 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/JumpTarget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,20 @@ public static (int catchTarget, int finallyTarget) ComputeTryTarget(int addr, In

public static (Dictionary<Instruction, Instruction>,
Dictionary<Instruction, (Instruction, Instruction)>,
Dictionary<Instruction, HashSet<Instruction>>)
Dictionary<int, HashSet<int>>)
FindAllJumpAndTrySourceToTargets(NefFile nef)
{
Script script = nef.Script;
return FindAllJumpAndTrySourceToTargets(script);
}
public static (Dictionary<Instruction, Instruction>,
Dictionary<Instruction, (Instruction, Instruction)>,
Dictionary<Instruction, HashSet<Instruction>>)
Dictionary<int, HashSet<int>>)
FindAllJumpAndTrySourceToTargets(Script script) => FindAllJumpAndTrySourceToTargets(script.EnumerateInstructions().ToList());
public static (
Dictionary<Instruction, Instruction>, // jump source to target
Dictionary<Instruction, (Instruction, Instruction)>, // try source to targets
Dictionary<Instruction, HashSet<Instruction>> // target to source
Dictionary<int, HashSet<int>> // target to source
)
FindAllJumpAndTrySourceToTargets(List<(int, Instruction)> addressAndInstructionsList)
{
Expand All @@ -78,33 +78,40 @@ public static (
addressToInstruction.Add(a, i);
Dictionary<Instruction, Instruction> jumpSourceToTargets = new();
Dictionary<Instruction, (Instruction, Instruction)> trySourceToTargets = new();
Dictionary<Instruction, HashSet<Instruction>> targetToSources = new();
Dictionary<int, HashSet<int>> targetToSources = new();
foreach ((int a, Instruction i) in addressAndInstructionsList)
{
if (SingleJumpInOperand(i))
{
Instruction target = addressToInstruction[ComputeJumpTarget(a, i)];
int targetAddr = ComputeJumpTarget(a, i);
Instruction target = addressToInstruction[targetAddr];
jumpSourceToTargets.TryAdd(i, target);
if (!targetToSources.TryGetValue(target, out HashSet<Instruction>? sources)) sources = new();
sources.Add(i);
if (!targetToSources.TryGetValue(targetAddr, out HashSet<int>? sources))
{
sources = new();
targetToSources.Add(targetAddr, sources);
}
sources.Add(a);
}
if (i.OpCode == TRY)
if (i.OpCode == TRY || i.OpCode == TRY_L)
{
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI8], addressToInstruction[a + i.TokenI8_1]);
(int a1, int a2) = i.OpCode == TRY ?
(a + i.TokenI8, a + i.TokenI8_1) :
(a + i.TokenI32, a + i.TokenI32_1);
(Instruction t1, Instruction t2) = (addressToInstruction[a1], addressToInstruction[a2]);
trySourceToTargets.TryAdd(i, (t1, t2));
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
sources1.Add(i);
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
sources2.Add(i);
}
if (i.OpCode == TRY_L)
{
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI32], addressToInstruction[a + i.TokenI32_1]);
trySourceToTargets.TryAdd(i, (t1, t2));
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
sources1.Add(i);
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
sources2.Add(i);
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources1))
{
sources1 = new();
targetToSources.Add(a1, sources1);
}
sources1.Add(a);
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources2))
{
sources2 = new();
targetToSources.Add(a2, sources2);
}
sources2.Add(a);
}
}
return (jumpSourceToTargets, trySourceToTargets, targetToSources);
Expand Down
Loading

0 comments on commit 3c16740

Please sign in to comment.