Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic block analyser #981

Merged
merged 11 commits into from
Mar 5, 2024
14 changes: 14 additions & 0 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/BasicBlock.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Neo.Json;
using Neo.SmartContract;
using Neo.SmartContract.Manifest;
using Neo.VM;
using System.Collections.Generic;

namespace Neo.Optimizer
{
static class BasicBlock
{
public static Dictionary<int, Dictionary<int, Instruction>> FindBasicBlocks(NefFile nef, ContractManifest manifest, JToken debugInfo)
=> new InstructionCoverage(nef, manifest, debugInfo).basicBlocks;
}
}
269 changes: 269 additions & 0 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/InstructionCoverage.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
using Neo.Json;
using Neo.SmartContract;
using Neo.SmartContract.Manifest;
using Neo.VM;
using System;
using System.Collections.Generic;
using System.Linq;
using static Neo.Optimizer.JumpTarget;
using static Neo.Optimizer.OpCodeTypes;

namespace Neo.Optimizer
{
public enum TryStackType
{
ENTRY,
TRY,
CATCH,
FINALLY,
}

public enum BranchType
{
OK, // One of the branches may return without exception
THROW, // All branches surely has exceptions, but can be catched
Jim8y marked this conversation as resolved.
Show resolved Hide resolved
ABORT, // All branches abort, and cannot be catched
UNCOVERED,
}

public class InstructionCoverage
{
Script script;
// Starting from the address, whether the call will surely throw or surely abort, or may be OK
public Dictionary<int, BranchType> coveredMap { get; protected set; }
public Dictionary<int, Dictionary<int, Instruction>> basicBlocks { get; protected set; }
public List<(int a, Instruction i)> addressAndInstructions { get; init; }
public Dictionary<int, HashSet<int>> jumpTargetToSources { get; init; }
public InstructionCoverage(NefFile nef, ContractManifest manifest, JToken debugInfo)
{
this.script = nef.Script;
coveredMap = new();
basicBlocks = new();
addressAndInstructions = script.EnumerateInstructions().ToList();
(_, _, jumpTargetToSources) = FindAllJumpAndTrySourceToTargets(addressAndInstructions);
foreach ((int addr, Instruction _) in addressAndInstructions)
coveredMap.Add(addr, BranchType.UNCOVERED);

// It is unsafe to go parallel, because the coveredMap value is not true/false
//Parallel.ForEach(manifest.Abi.Methods, method =>
// CoverInstruction(method.Offset, script, coveredMap)
//);
foreach ((int addr, _) in EntryPoint.EntryPointsByMethod(manifest, debugInfo))
CoverInstruction(addr);
}

public static Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> CopyStack
(Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack) => new(stack.Reverse());

public BranchType HandleThrow(int entranceAddr, int addr, Stack<((int catchAddr, int finallyAddr), TryStackType stackType)> stack)
{
stack = CopyStack(stack);
TryStackType stackType;
int catchAddr; int finallyAddr;
do
((catchAddr, finallyAddr), stackType) = stack.Pop();
while (stackType != TryStackType.TRY && stackType != TryStackType.CATCH && stack.Count > 0);
if (stackType == TryStackType.TRY) // goto CATCH or FINALLY
{
// try with catch: cancel throw and execute catch
if (catchAddr != -1)
{
addr = catchAddr;
stack.Push(((-1, finallyAddr), TryStackType.CATCH));
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: false);
return coveredMap[entranceAddr];
}
// try without catch: execute finally but keep throwing
else if (finallyAddr != -1)
{
coveredMap[addr] = BranchType.THROW;
addr = finallyAddr;
stack.Push(((-1, -1), TryStackType.FINALLY));
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: true);
return coveredMap[entranceAddr];
}
}
// throwed in catch with finally: execute finally but keep throwing
if (stackType == TryStackType.CATCH)
{
if (finallyAddr != -1)
{
addr = finallyAddr;
stack.Push(((-1, -1), TryStackType.FINALLY));
}
return CoverInstruction(addr, stack: stack, throwed: true);
}
// not in try and not in catch
coveredMap[entranceAddr] = BranchType.THROW;
return BranchType.THROW;
}

public BranchType HandleAbort(int entranceAddr, int addr, Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack)
{
// See if we are in a try or catch. There may still be runtime exceptions
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType == TryStackType.TRY && catchAddr != -1 ||
stackType == TryStackType.CATCH && finallyAddr != -1)
{
// Visit catchAddr because there may still be exceptions at runtime
if (HandleThrow(entranceAddr, addr, stack) == BranchType.OK)
{
coveredMap[entranceAddr] = BranchType.OK;
return BranchType.OK;
}
}
coveredMap[entranceAddr] = BranchType.ABORT;
return coveredMap[entranceAddr];
}

/// <summary>
/// Cover a basic block, and recursively cover all branches
/// </summary>
/// <param name="addr"></param>
/// <param name="script"></param>
/// <param name="coveredMap"></param>
/// <returns>Whether it is possible to return without exception</returns>
/// <exception cref="BadScriptException"></exception>
/// <exception cref="NotImplementedException"></exception>
public BranchType CoverInstruction(int addr,
Stack<((int returnAddr, int finallyAddr), TryStackType stackType)>? stack = null,
bool throwed = false)
{
int entranceAddr = addr;
if (stack == null)
{
stack = new();
stack.Push(((-1, -1), TryStackType.ENTRY));
}
else
stack = CopyStack(stack);
if (throwed)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType != TryStackType.FINALLY)
{
coveredMap[entranceAddr] = BranchType.THROW;
return BranchType.THROW;
}
}
while (true)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while true?

Yes. It should call another CoverInstruction on a new basic block, or handle throw/abort. script.GetInstruction(addr) is guaranteed to return RET if the addr is too large.

{
// For the analysis of basic blocks,
// we launched new recursion when exception is catched.
// Here we have the exception not catched
if (!coveredMap.ContainsKey(addr))
throw new BadScriptException($"wrong address {addr}");
if (coveredMap[addr] != BranchType.UNCOVERED)
// We have visited the code. Skip it.
return coveredMap[addr];
if (jumpTargetToSources.ContainsKey(addr) && addr != entranceAddr)
// on target of jump, start a new recursion to split basic blocks
return CoverInstruction(addr, stack, throwed);
Instruction instruction = script.GetInstruction(addr);
if (instruction.OpCode != OpCode.NOP)
{
coveredMap[addr] = BranchType.OK;
// Add a basic block starting from entranceAddr
if (!basicBlocks.TryGetValue(entranceAddr, out Dictionary<int, Instruction>? instructions))
{
instructions = new Dictionary<int, Instruction>();
basicBlocks.Add(entranceAddr, instructions);
}
// Add this instruction to the basic block starting from entranceAddr
instructions.Add(addr, instruction);
}

// TODO: ABORTMSG may THROW instead of ABORT. Just throw new NotImplementedException for ABORTMSG?
if (instruction.OpCode == OpCode.ABORT || instruction.OpCode == OpCode.ABORTMSG)
return HandleAbort(entranceAddr, addr, stack);
if (callWithJump.Contains(instruction.OpCode))
{
int callTarget = ComputeJumpTarget(addr, instruction);
BranchType returnedType = CoverInstruction(callTarget);
if (returnedType == BranchType.OK)
return CoverInstruction(addr + instruction.Size, stack);
if (returnedType == BranchType.ABORT)
return HandleAbort(entranceAddr, addr, stack);
if (returnedType == BranchType.THROW)
return HandleThrow(entranceAddr, addr, stack);
}
if (instruction.OpCode == OpCode.RET)
{
// See if we are in a try. There may still be runtime exceptions
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;
return coveredMap[entranceAddr];
}
if (tryThrowFinally.Contains(instruction.OpCode))
{
if (instruction.OpCode == OpCode.TRY || instruction.OpCode == OpCode.TRY_L)
{
stack.Push((ComputeTryTarget(addr, instruction), TryStackType.TRY));
return CoverInstruction(addr + instruction.Size, stack);
}
if (instruction.OpCode == OpCode.THROW)
return HandleThrow(entranceAddr, addr, stack);
if (instruction.OpCode == OpCode.ENDTRY || instruction.OpCode == OpCode.ENDTRY_L)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
if (stackType != TryStackType.TRY && stackType != TryStackType.CATCH)
throw new BadScriptException("No try stack on ENDTRY");

// Visit catchAddr and finallyAddr because there may still be exceptions at runtime
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;

stack.Pop();
int endPointer = ComputeJumpTarget(addr, instruction);
if (finallyAddr != -1)
{
stack.Push(((-1, endPointer), TryStackType.FINALLY));
addr = finallyAddr;
}
else
addr = endPointer;
return CoverInstruction(addr, stack, throwed);
}
if (instruction.OpCode == OpCode.ENDFINALLY)
{
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Pop();
if (stackType != TryStackType.FINALLY)
throw new BadScriptException("No finally stack on ENDFINALLY");
if (throwed)
{
// For this basic block in finally, the branch type is OK
coveredMap[entranceAddr] = BranchType.OK;
// The throw is caused by previous codes
return BranchType.THROW;
}
return CoverInstruction(addr + instruction.Size, stack, false);
}
}
if (unconditionalJump.Contains(instruction.OpCode))
//addr = ComputeJumpTarget(addr, instruction);
//continue;
// For the analysis of basic blocks, we launch a new recursion
return CoverInstruction(ComputeJumpTarget(addr, instruction), stack, throwed);
if (conditionalJump.Contains(instruction.OpCode) || conditionalJump_L.Contains(instruction.OpCode))
{
BranchType noJump = CoverInstruction(addr + instruction.Size, stack);
BranchType jump = CoverInstruction(ComputeJumpTarget(addr, instruction), stack);
if (noJump == BranchType.OK || jump == BranchType.OK)
{
// See if we are in a try. There may still be runtime exceptions
HandleThrow(entranceAddr, addr, stack);
coveredMap[entranceAddr] = BranchType.OK;
return coveredMap[entranceAddr];
}
if (noJump == BranchType.ABORT && jump == BranchType.ABORT)
return HandleAbort(entranceAddr, addr, stack);
if (noJump == BranchType.THROW || jump == BranchType.THROW) // THROW, ABORT => THROW
return HandleThrow(entranceAddr, addr, stack);
throw new Exception($"Unknown {nameof(BranchType)} {noJump} {jump}");
}

addr += instruction.Size;
}
}
}
}
51 changes: 29 additions & 22 deletions src/Neo.Compiler.CSharp/Optimizer/Analysers/JumpTarget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,20 @@ public static (int catchTarget, int finallyTarget) ComputeTryTarget(int addr, In

public static (Dictionary<Instruction, Instruction>,
Dictionary<Instruction, (Instruction, Instruction)>,
Dictionary<Instruction, HashSet<Instruction>>)
Dictionary<int, HashSet<int>>)
FindAllJumpAndTrySourceToTargets(NefFile nef)
{
Script script = nef.Script;
return FindAllJumpAndTrySourceToTargets(script);
}
public static (Dictionary<Instruction, Instruction>,
Dictionary<Instruction, (Instruction, Instruction)>,
Dictionary<Instruction, HashSet<Instruction>>)
Dictionary<int, HashSet<int>>)
FindAllJumpAndTrySourceToTargets(Script script) => FindAllJumpAndTrySourceToTargets(script.EnumerateInstructions().ToList());
public static (
Dictionary<Instruction, Instruction>, // jump source to target
Dictionary<Instruction, (Instruction, Instruction)>, // try source to targets
Dictionary<Instruction, HashSet<Instruction>> // target to source
Dictionary<int, HashSet<int>> // target to source
)
FindAllJumpAndTrySourceToTargets(List<(int, Instruction)> addressAndInstructionsList)
{
Expand All @@ -78,33 +78,40 @@ public static (
addressToInstruction.Add(a, i);
Dictionary<Instruction, Instruction> jumpSourceToTargets = new();
Dictionary<Instruction, (Instruction, Instruction)> trySourceToTargets = new();
Dictionary<Instruction, HashSet<Instruction>> targetToSources = new();
Dictionary<int, HashSet<int>> targetToSources = new();
foreach ((int a, Instruction i) in addressAndInstructionsList)
{
if (SingleJumpInOperand(i))
{
Instruction target = addressToInstruction[ComputeJumpTarget(a, i)];
int targetAddr = ComputeJumpTarget(a, i);
Instruction target = addressToInstruction[targetAddr];
jumpSourceToTargets.TryAdd(i, target);
if (!targetToSources.TryGetValue(target, out HashSet<Instruction>? sources)) sources = new();
sources.Add(i);
if (!targetToSources.TryGetValue(targetAddr, out HashSet<int>? sources))
{
sources = new();
targetToSources.Add(targetAddr, sources);
}
sources.Add(a);
}
if (i.OpCode == TRY)
if (i.OpCode == TRY || i.OpCode == TRY_L)
{
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI8], addressToInstruction[a + i.TokenI8_1]);
(int a1, int a2) = i.OpCode == TRY ?
(a + i.TokenI8, a + i.TokenI8_1) :
(a + i.TokenI32, a + i.TokenI32_1);
(Instruction t1, Instruction t2) = (addressToInstruction[a1], addressToInstruction[a2]);
trySourceToTargets.TryAdd(i, (t1, t2));
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
sources1.Add(i);
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
sources2.Add(i);
}
if (i.OpCode == TRY_L)
{
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI32], addressToInstruction[a + i.TokenI32_1]);
trySourceToTargets.TryAdd(i, (t1, t2));
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
sources1.Add(i);
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
sources2.Add(i);
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources1))
{
sources1 = new();
targetToSources.Add(a1, sources1);
}
sources1.Add(a);
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources2))
{
sources2 = new();
targetToSources.Add(a2, sources2);
}
sources2.Add(a);
}
}
return (jumpSourceToTargets, trySourceToTargets, targetToSources);
Expand Down
Loading
Loading