Skip to content

Commit

Permalink
refactor for multi cpu architecture support (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
cornelk authored Dec 22, 2024
1 parent eb6a5c2 commit 26e3a4b
Show file tree
Hide file tree
Showing 41 changed files with 1,331 additions and 814 deletions.
1 change: 0 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ linters:
- grouper # An analyzer to analyze expression groups
- ineffassign # Detects when assignments to existing variables are not used
- intrange # finds places where for loops could make use of an integer range.
- ireturn # accept Interfaces, Return Concrete Types
- maintidx # measures the maintainability index of each function
- makezero # Finds slice declarations with non-zero initial length
- mirror # reports wrong mirror patterns of bytes/strings usage
Expand Down
37 changes: 37 additions & 0 deletions internal/arch/arch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Package arch contains types and functions used for multi architecture support.
// It acts as a bridge between the disassembler and the architecture specific code.
package arch

// Architecture contains architecture specific information.
type Architecture interface {
// Constants returns the constants translation map.
Constants() (map[uint16]ConstTranslation, error)
// GetAddressingParam returns the address of the param if it references an address.
GetAddressingParam(param any) (uint16, bool)
// HandleDisambiguousInstructions translates disambiguous instructions into data bytes as it
// has multiple opcodes for the same addressing mode which can result in different
// bytes being assembled and make the resulting ROM not matching the original.
HandleDisambiguousInstructions(dis Disasm, address uint16, offsetInfo Offset) bool
// Initialize the architecture.
Initialize(dis Disasm) error
// IsAddressingIndexed returns if the opcode is using indexed addressing.
IsAddressingIndexed(opcode Opcode) bool
// LastCodeAddress returns the last possible address of code.
// This is used in systems where the last address is reserved for
// the interrupt vector table.
LastCodeAddress() uint16
// ProcessOffset processes an offset and returns if the offset was processed and an error if any.
ProcessOffset(dis Disasm, address uint16, offsetInfo Offset) (bool, error)
// ProcessVarUsage processes the variable usage of an offset.
ProcessVarUsage(offsetInfo Offset, reference string) error
// ReadOpParam reads the parameter of an opcode.
ReadOpParam(dis Disasm, addressing int, address uint16) (any, []byte, error)
}

// ConstTranslation represents a constant translation from a read and write operation to a name.
type ConstTranslation struct {
Address uint16

Read string
Write string
}
62 changes: 62 additions & 0 deletions internal/arch/disasm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package arch

import (
"github.com/retroenv/nesgodisasm/internal/options"
"github.com/retroenv/nesgodisasm/internal/program"
"github.com/retroenv/retrogolib/arch/nes/cartridge"
"github.com/retroenv/retrogolib/log"
)

// Disasm represents a disassembler.
type Disasm interface {
JumpEngine

// AddAddressToParse adds an address to the list to be processed if the address has not been processed yet.
AddAddressToParse(address, context, from uint16, currentInstruction Instruction, isABranchDestination bool)
// AddVariableReference adds a variable reference if the opcode is accessing
// the given address directly by reading or writing.
AddVariableReference(addressReference, usageAddress uint16, opcode Opcode, forceVariableUsage bool)
// Cart returns the loaded cartridge.
Cart() *cartridge.Cartridge
// ChangeAddressRangeToCodeAsData sets a range of code address to code as
// data types. It combines all data bytes that are not split by a label.
ChangeAddressRangeToCodeAsData(address uint16, data []byte)
// Logger returns the logger.
Logger() *log.Logger
// OffsetInfo returns the offset information for the given address.
OffsetInfo(address uint16) Offset
// Options returns the disassembler options.
Options() options.Disassembler
// ProgramCounter returns the current program counter of the execution tracer.
ProgramCounter() uint16
// ReadMemory reads a byte from the memory at the given address.
ReadMemory(address uint16) (byte, error)
// ReadMemoryWord reads a word from the memory at the given address.
ReadMemoryWord(address uint16) (uint16, error)
// ReplaceParamByConstant replaces the parameter of an instruction by a constant name
// if the address of the instruction is found in the constants map.
ReplaceParamByConstant(address uint16, opcode Opcode, paramAsString string) (string, bool)
// SetCodeBaseAddress sets the code base address.
SetCodeBaseAddress(address uint16)
// SetHandlers sets the program vector handlers.
SetHandlers(handlers program.Handlers)
// SetVectorsStartAddress sets the start address of the vectors.
SetVectorsStartAddress(address uint16)
}

// JumpEngine contains jump engine related helper.
type JumpEngine interface {
// AddJumpEngine adds a jump engine function address to the list of jump engines.
AddJumpEngine(address uint16)
// GetContextDataReferences parse all instructions of the function context until the jump
// and returns data references that could point to the function table.
GetContextDataReferences(offsets []Offset, addresses []uint16) ([]uint16, error)
// GetFunctionTableReference detects a jump engine function context and its function table.
GetFunctionTableReference(context uint16, dataReferences []uint16)
// HandleJumpEngineDestination processes a newly detected jump engine destination.
HandleJumpEngineDestination(caller, destination uint16) error
// HandleJumpEngineCallers processes all callers of a newly detected jump engine function.
HandleJumpEngineCallers(context uint16) error
// JumpContextInfo builds the list of instructions of the current function context.
JumpContextInfo(jumpAddress uint16, offsetInfo Offset) ([]Offset, []uint16)
}
13 changes: 13 additions & 0 deletions internal/arch/instruction.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package arch

// Instruction represents a CPU instruction.
type Instruction interface {
// IsCall returns true if the instruction is a call.
IsCall() bool
// IsNil returns true if the instruction is nil.
IsNil() bool
// Name returns the instruction name.
Name() string
// Unofficial returns true if the instruction is not official.
Unofficial() bool
}
45 changes: 45 additions & 0 deletions internal/arch/m6502/addressing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package m6502

import (
"github.com/retroenv/nesgodisasm/internal/arch"
"github.com/retroenv/retrogolib/arch/cpu/m6502"
)

// GetAddressingParam returns the address of the param if it references an address.
func (ar *Arch6502) GetAddressingParam(param any) (uint16, bool) {
switch val := param.(type) {
case m6502.Absolute:
return uint16(val), true
case m6502.AbsoluteX:
return uint16(val), true
case m6502.AbsoluteY:
return uint16(val), true
case m6502.Indirect:
return uint16(val), true
case m6502.IndirectX:
return uint16(val), true
case m6502.IndirectY:
return uint16(val), true
case m6502.ZeroPage:
return uint16(val), true
case m6502.ZeroPageX:
return uint16(val), true
case m6502.ZeroPageY:
return uint16(val), true
default:
return 0, false
}
}

// IsAddressingIndexed returns if the opcode is using indexed addressing.
func (ar *Arch6502) IsAddressingIndexed(opcode arch.Opcode) bool {
addressing := m6502.AddressingMode(opcode.Addressing())
switch addressing {
case m6502.ZeroPageXAddressing, m6502.ZeroPageYAddressing,
m6502.AbsoluteXAddressing, m6502.AbsoluteYAddressing,
m6502.IndirectXAddressing, m6502.IndirectYAddressing:
return true
default:
return false
}
}
37 changes: 37 additions & 0 deletions internal/arch/m6502/code.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package m6502

import (
"github.com/retroenv/nesgodisasm/internal/arch"
"github.com/retroenv/nesgodisasm/internal/program"
"github.com/retroenv/retrogolib/arch/cpu/m6502"
)

// HandleDisambiguousInstructions translates disambiguous instructions into data bytes as it
// has multiple opcodes for the same addressing mode which can result in different
// bytes being assembled and make the resulting ROM not matching the original.
func (ar *Arch6502) HandleDisambiguousInstructions(dis arch.Disasm, address uint16, offsetInfo arch.Offset) bool {
instruction := offsetInfo.Opcode().Instruction()
if !instruction.Unofficial() || address >= m6502.InterruptVectorStartAddress {
return false
}

opts := dis.Options()
if instruction.Name() != m6502.Nop.Name &&
instruction.Name() != m6502.Sbc.Name &&
!opts.NoUnofficialInstructions {

return false
}

code := offsetInfo.Code()
if code == "" { // in case of branch into unofficial nop instruction detected
offsetInfo.SetComment("disambiguous instruction: " + offsetInfo.Comment())
} else {
offsetInfo.SetComment("disambiguous instruction: " + offsetInfo.Code())
}

offsetInfo.SetCode("")
offsetInfo.SetType(program.CodeAsData)
dis.ChangeAddressRangeToCodeAsData(address, offsetInfo.Data())
return true
}
49 changes: 49 additions & 0 deletions internal/arch/m6502/const.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package m6502

import (
"fmt"

"github.com/retroenv/nesgodisasm/internal/arch"
"github.com/retroenv/retrogolib/arch/cpu/m6502"
"github.com/retroenv/retrogolib/arch/nes/register"
)

// Constants builds the map of all known NES constants from all
// modules that maps an address to a constant name.
func (ar *Arch6502) Constants() (map[uint16]arch.ConstTranslation, error) {
m := map[uint16]arch.ConstTranslation{}
if err := mergeConstantsMaps(m, register.APUAddressToName); err != nil {
return nil, fmt.Errorf("processing apu constants: %w", err)
}
if err := mergeConstantsMaps(m, register.ControllerAddressToName); err != nil {
return nil, fmt.Errorf("processing controller constants: %w", err)
}
if err := mergeConstantsMaps(m, register.PPUAddressToName); err != nil {
return nil, fmt.Errorf("processing ppu constants: %w", err)
}
return m, nil
}

func mergeConstantsMaps(destination map[uint16]arch.ConstTranslation, source map[uint16]m6502.AccessModeConstant) error {
for address, constantInfo := range source {
translation := destination[address]
translation.Address = address

if constantInfo.Mode&m6502.ReadAccess != 0 {
if translation.Read != "" {
return fmt.Errorf("constant with address 0x%04X and read mode is defined twice", address)
}
translation.Read = constantInfo.Constant
}

if constantInfo.Mode&m6502.WriteAccess != 0 {
if translation.Write != "" {
return fmt.Errorf("constant with address 0x%04X and write mode is defined twice", address)
}
translation.Write = constantInfo.Constant
}

destination[address] = translation
}
return nil
}
33 changes: 33 additions & 0 deletions internal/arch/m6502/instruction.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package m6502

import (
"github.com/retroenv/nesgodisasm/internal/arch"
"github.com/retroenv/retrogolib/arch/cpu/m6502"
)

var _ arch.Instruction = &Instruction{}

// Instruction represents a 6502 CPU instruction.
type Instruction struct {
ins *m6502.Instruction
}

// IsCall returns true if the instruction is a call.
func (i Instruction) IsCall() bool {
return i.ins.Name == m6502.Jsr.Name
}

// IsNil returns true if the instruction is nil.
func (i Instruction) IsNil() bool {
return i.ins == nil
}

// Name returns the instruction name.
func (i Instruction) Name() string {
return i.ins.Name
}

// Unofficial returns true if the instruction is not official.
func (i Instruction) Unofficial() bool {
return i.ins.Unofficial
}
75 changes: 75 additions & 0 deletions internal/arch/m6502/jumpengine.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package m6502

import (
"encoding/binary"
"fmt"

"github.com/retroenv/nesgodisasm/internal/arch"
"github.com/retroenv/retrogolib/arch/cpu/m6502"
"github.com/retroenv/retrogolib/log"
)

const (
jumpEngineMaxContextSize = 0x25
)

// checkForJumpEngineJmp checks if the current instruction is the jump instruction inside a jump engine function.
// The function offsets after the call to the jump engine will be used as destinations to disassemble as code.
// This can be found in some official games like Super Mario Bros.
func (ar *Arch6502) checkForJumpEngineJmp(dis arch.Disasm, jumpAddress uint16, offsetInfo arch.Offset) error {
instruction := offsetInfo.Opcode().Instruction()
addressing := m6502.AddressingMode(offsetInfo.Opcode().Addressing())
if instruction.Name() != m6502.Jmp.Name || addressing != m6502.IndirectAddressing {
return nil
}

contextOffsets, contextAddresses := dis.JumpContextInfo(jumpAddress, offsetInfo)
contextSize := jumpAddress - offsetInfo.Context() + 3
dataReferences, err := dis.GetContextDataReferences(contextOffsets, contextAddresses)
if err != nil {
return fmt.Errorf("getting context data references: %w", err)
}

if len(dataReferences) > 1 {
dis.GetFunctionTableReference(offsetInfo.Context(), dataReferences)
}

dis.Logger().Debug("Jump engine detected",
log.String("address", fmt.Sprintf("0x%04X", jumpAddress)),
log.Uint16("code_size", contextSize),
)

// if code reaches this point, no branching instructions beside the final indirect jmp have been found
// in the function, this makes it likely a jump engine
dis.AddJumpEngine(offsetInfo.Context())

if contextSize < jumpEngineMaxContextSize {
if err := dis.HandleJumpEngineCallers(offsetInfo.Context()); err != nil {
return fmt.Errorf("handling jump engine callers: %w", err)
}
return nil
}
offsetInfo.SetComment("jump engine detected")
return nil
}

// checkForJumpEngineCall checks if the current instruction is a call into a jump engine function.
func (ar *Arch6502) checkForJumpEngineCall(dis arch.Disasm, address uint16, offsetInfo arch.Offset) error {
instruction := offsetInfo.Opcode().Instruction()
addressing := m6502.AddressingMode(offsetInfo.Opcode().Addressing())
if instruction.Name() != m6502.Jsr.Name || addressing != m6502.AbsoluteAddressing {
return nil
}

pc := dis.ProgramCounter()
_, opcodes, err := ar.ReadOpParam(dis, offsetInfo.Opcode().Addressing(), pc)
if err != nil {
return err
}

destination := binary.LittleEndian.Uint16(opcodes)
if err := dis.HandleJumpEngineDestination(address, destination); err != nil {
return fmt.Errorf("handling jump engine destination: %w", err)
}
return nil
}
Loading

0 comments on commit 26e3a4b

Please sign in to comment.