diff --git a/internal/arch/disasm.go b/internal/arch/disasm.go index 6f18378..fb8046e 100644 --- a/internal/arch/disasm.go +++ b/internal/arch/disasm.go @@ -9,8 +9,6 @@ import ( // Disasm represents a disassembler. type Disasm interface { - JumpEngine - // AddAddressToParse adds an address to the list to be processed if the address has not been processed yet. AddAddressToParse(address, context, from uint16, currentInstruction Instruction, isABranchDestination bool) // AddVariableReference adds a variable reference if the opcode is accessing @@ -21,8 +19,14 @@ type Disasm interface { // ChangeAddressRangeToCodeAsData sets a range of code address to code as // data types. It combines all data bytes that are not split by a label. ChangeAddressRangeToCodeAsData(address uint16, data []byte) + // CodeBaseAddress returns the code base address. + CodeBaseAddress() uint16 // Constants returns the constants manager. Constants() ConstantManager + // DeleteFunctionReturnToParse deletes a function return address from the list of addresses to parse. + DeleteFunctionReturnToParse(address uint16) + // JumpEngine returns the jump engine. + JumpEngine() JumpEngine // Logger returns the logger. Logger() *log.Logger // OffsetInfo returns the offset information for the given address. @@ -48,20 +52,3 @@ type ConstantManager interface { // if the address of the instruction is found in the constants map. ReplaceParameter(address uint16, opcode Opcode, paramAsString string) (string, bool) } - -// JumpEngine contains jump engine related helper. -type JumpEngine interface { - // AddJumpEngine adds a jump engine function address to the list of jump engines. - AddJumpEngine(address uint16) - // GetContextDataReferences parse all instructions of the function context until the jump - // and returns data references that could point to the function table. - GetContextDataReferences(offsets []Offset, addresses []uint16) ([]uint16, error) - // GetFunctionTableReference detects a jump engine function context and its function table. - GetFunctionTableReference(context uint16, dataReferences []uint16) - // HandleJumpEngineDestination processes a newly detected jump engine destination. - HandleJumpEngineDestination(caller, destination uint16) error - // HandleJumpEngineCallers processes all callers of a newly detected jump engine function. - HandleJumpEngineCallers(context uint16) error - // JumpContextInfo builds the list of instructions of the current function context. - JumpContextInfo(jumpAddress uint16, offsetInfo Offset) ([]Offset, []uint16) -} diff --git a/internal/arch/jumpengine.go b/internal/arch/jumpengine.go new file mode 100644 index 0000000..d80ccd5 --- /dev/null +++ b/internal/arch/jumpengine.go @@ -0,0 +1,21 @@ +package arch + +// JumpEngine contains jump engine related helper. +type JumpEngine interface { + // AddJumpEngine adds a jump engine function address to the list of jump engines. + AddJumpEngine(address uint16) + // GetContextDataReferences parse all instructions of the function context until the jump + // and returns data references that could point to the function table. + GetContextDataReferences(dis Disasm, offsets []Offset, addresses []uint16) ([]uint16, error) + // GetFunctionTableReference detects a jump engine function context and its function table. + GetFunctionTableReference(context uint16, dataReferences []uint16) + // HandleJumpEngineDestination processes a newly detected jump engine destination. + HandleJumpEngineDestination(dis Disasm, caller, destination uint16) error + // HandleJumpEngineCallers processes all callers of a newly detected jump engine function. + HandleJumpEngineCallers(dis Disasm, context uint16) error + // JumpContextInfo builds the list of instructions of the current function context. + JumpContextInfo(dis Disasm, jumpAddress uint16, offsetInfo Offset) ([]Offset, []uint16) + // ScanForNewJumpEngineEntry scans all jump engine calls for an unprocessed entry in the function address table that + // follows the call. It returns whether a new address to parse was added. + ScanForNewJumpEngineEntry(dis Disasm) (bool, error) +} diff --git a/internal/arch/m6502/jumpengine.go b/internal/arch/m6502/jumpengine.go index fe3b4a0..161e93b 100644 --- a/internal/arch/m6502/jumpengine.go +++ b/internal/arch/m6502/jumpengine.go @@ -23,15 +23,16 @@ func (ar *Arch6502) checkForJumpEngineJmp(dis arch.Disasm, jumpAddress uint16, o return nil } - contextOffsets, contextAddresses := dis.JumpContextInfo(jumpAddress, offsetInfo) + jumpEngine := dis.JumpEngine() + contextOffsets, contextAddresses := jumpEngine.JumpContextInfo(dis, jumpAddress, offsetInfo) contextSize := jumpAddress - offsetInfo.Context() + 3 - dataReferences, err := dis.GetContextDataReferences(contextOffsets, contextAddresses) + dataReferences, err := jumpEngine.GetContextDataReferences(dis, contextOffsets, contextAddresses) if err != nil { return fmt.Errorf("getting context data references: %w", err) } if len(dataReferences) > 1 { - dis.GetFunctionTableReference(offsetInfo.Context(), dataReferences) + jumpEngine.GetFunctionTableReference(offsetInfo.Context(), dataReferences) } dis.Logger().Debug("Jump engine detected", @@ -41,10 +42,10 @@ func (ar *Arch6502) checkForJumpEngineJmp(dis arch.Disasm, jumpAddress uint16, o // if code reaches this point, no branching instructions beside the final indirect jmp have been found // in the function, this makes it likely a jump engine - dis.AddJumpEngine(offsetInfo.Context()) + jumpEngine.AddJumpEngine(offsetInfo.Context()) if contextSize < jumpEngineMaxContextSize { - if err := dis.HandleJumpEngineCallers(offsetInfo.Context()); err != nil { + if err := jumpEngine.HandleJumpEngineCallers(dis, offsetInfo.Context()); err != nil { return fmt.Errorf("handling jump engine callers: %w", err) } return nil @@ -67,8 +68,9 @@ func (ar *Arch6502) checkForJumpEngineCall(dis arch.Disasm, address uint16, offs return err } + jumpEngine := dis.JumpEngine() destination := binary.LittleEndian.Uint16(opcodes) - if err := dis.HandleJumpEngineDestination(address, destination); err != nil { + if err := jumpEngine.HandleJumpEngineDestination(dis, address, destination); err != nil { return fmt.Errorf("handling jump engine destination: %w", err) } return nil diff --git a/internal/arch/offset.go b/internal/arch/offset.go index cd25ae1..2f262b8 100644 --- a/internal/arch/offset.go +++ b/internal/arch/offset.go @@ -4,6 +4,8 @@ import "github.com/retroenv/nesgodisasm/internal/program" // Offset represents an offset in the disassembled code. type Offset interface { + // BranchFrom returns the list of addresses that branch to this offset. + BranchFrom() []uint16 // ClearType clears the offset type. ClearType(offsetType program.OffsetType) // Code returns the code string of the offset. @@ -32,8 +34,12 @@ type Offset interface { SetData([]byte) // SetLabel sets the label of the offset. SetLabel(string) + // SetLabelComment sets the label comment of the offset. + SetLabelComment(string) // SetOpcode sets the opcode of the offset. SetOpcode(Opcode) // SetType sets the offset type. SetType(offsetType program.OffsetType) + // Type returns the offset type. + Type() program.OffsetType } diff --git a/internal/disasm.go b/internal/disasm.go index 0e749de..42ab78a 100644 --- a/internal/disasm.go +++ b/internal/disasm.go @@ -10,6 +10,7 @@ import ( "github.com/retroenv/nesgodisasm/internal/arch" "github.com/retroenv/nesgodisasm/internal/assembler" "github.com/retroenv/nesgodisasm/internal/consts" + "github.com/retroenv/nesgodisasm/internal/jumpengine" "github.com/retroenv/nesgodisasm/internal/options" "github.com/retroenv/nesgodisasm/internal/program" "github.com/retroenv/nesgodisasm/internal/writer" @@ -38,14 +39,13 @@ type Disasm struct { codeBaseAddress uint16 // codebase address of the cartridge, it is not always 0x8000 vectorsStartAddress uint16 + jumpEngine *jumpengine.JumpEngine + constants *consts.Consts variables map[uint16]*variable usedVariables map[uint16]struct{} - jumpEngines map[uint16]struct{} // set of all jump engine functions addresses - jumpEngineCallers []*jumpEngineCaller // jump engine caller tables to process - jumpEngineCallersAdded map[uint16]*jumpEngineCaller - branchDestinations map[uint16]struct{} // set of all addresses that are branched to + branchDestinations map[uint16]struct{} // set of all addresses that are branched to // TODO handle bank switch offsetsToParse []uint16 @@ -71,12 +71,11 @@ func New(ar arch.Architecture, logger *log.Logger, cart *cartridge.Cartridge, fileWriterConstructor: fileWriterConstructor, variables: map[uint16]*variable{}, usedVariables: map[uint16]struct{}{}, - jumpEngineCallersAdded: map[uint16]*jumpEngineCaller{}, - jumpEngines: map[uint16]struct{}{}, branchDestinations: map[uint16]struct{}{}, offsetsToParseAdded: map[uint16]struct{}{}, offsetsParsed: map[uint16]struct{}{}, functionReturnsToParseAdded: map[uint16]struct{}{}, + jumpEngine: jumpengine.New(ar), } var err error @@ -148,6 +147,10 @@ func (dis *Disasm) SetHandlers(handlers program.Handlers) { dis.handlers = handlers } +func (dis *Disasm) CodeBaseAddress() uint16 { + return dis.codeBaseAddress +} + func (dis *Disasm) SetCodeBaseAddress(address uint16) { dis.codeBaseAddress = address @@ -168,6 +171,11 @@ func (dis *Disasm) Constants() arch.ConstantManager { return dis.constants } +// JumpEngine returns the jump engine. +func (dis *Disasm) JumpEngine() arch.JumpEngine { + return dis.jumpEngine +} + // converts the internal disassembly representation to a program type that will be used by // the chosen assembler output instance to generate the asm file. func (dis *Disasm) convertToProgram() (*program.Program, error) { diff --git a/internal/jumpengine.go b/internal/jumpengine/jumpengine.go similarity index 61% rename from internal/jumpengine.go rename to internal/jumpengine/jumpengine.go index 064cf11..4dd3b5b 100644 --- a/internal/jumpengine.go +++ b/internal/jumpengine/jumpengine.go @@ -1,4 +1,5 @@ -package disasm +// Package jumpengine provides jump engine detection and processing. +package jumpengine import ( "fmt" @@ -8,6 +9,8 @@ import ( "github.com/retroenv/retrogolib/log" ) +var _ arch.JumpEngine = &JumpEngine{} + const jumpEngineLastInstructionsCheck = 16 // jumpEngineCaller stores info about a caller of a jump engine, which is followed by a list of function addresses @@ -17,15 +20,32 @@ type jumpEngineCaller struct { tableStartAddress uint16 } +type JumpEngine struct { + arch arch.Architecture + + jumpEngines map[uint16]struct{} // set of all jump engine functions addresses + jumpEngineCallers []*jumpEngineCaller // jump engine caller tables to process + jumpEngineCallersAdded map[uint16]*jumpEngineCaller +} + +func New(ar arch.Architecture) *JumpEngine { + return &JumpEngine{ + arch: ar, + jumpEngines: map[uint16]struct{}{}, + jumpEngineCallers: []*jumpEngineCaller{}, + jumpEngineCallersAdded: map[uint16]*jumpEngineCaller{}, + } +} + // AddJumpEngine adds a jump engine function address to the list of jump engines. -func (dis *Disasm) AddJumpEngine(address uint16) { - dis.jumpEngines[address] = struct{}{} +func (j *JumpEngine) AddJumpEngine(address uint16) { + j.jumpEngines[address] = struct{}{} } // GetFunctionTableReference detects a jump engine function context and its function table. // TODO use jump address as key to be able to handle large function // contexts containing multiple jump engines -func (dis *Disasm) GetFunctionTableReference(context uint16, dataReferences []uint16) { +func (j *JumpEngine) GetFunctionTableReference(context uint16, dataReferences []uint16) { // if there are multiple data references just look at the last 2 if len(dataReferences) > 2 { dataReferences = dataReferences[len(dataReferences)-2:] @@ -48,15 +68,18 @@ func (dis *Disasm) GetFunctionTableReference(context uint16, dataReferences []ui } jumpEngine := &jumpEngineCaller{} - dis.jumpEngineCallersAdded[context] = jumpEngine - dis.jumpEngineCallers = append(dis.jumpEngineCallers, jumpEngine) + j.jumpEngineCallersAdded[context] = jumpEngine + j.jumpEngineCallers = append(j.jumpEngineCallers, jumpEngine) - dis.jumpEngineCallersAdded[context].tableStartAddress = smallestReference + j.jumpEngineCallersAdded[context].tableStartAddress = smallestReference } // GetContextDataReferences parse all instructions of the function context until the jump // and returns data references that could point to the function table. -func (dis *Disasm) GetContextDataReferences(offsets []arch.Offset, addresses []uint16) ([]uint16, error) { +func (j *JumpEngine) GetContextDataReferences(dis arch.Disasm, offsets []arch.Offset, + addresses []uint16) ([]uint16, error) { + + codeBaseAddress := dis.CodeBaseAddress() var dataReferences []uint16 for i, offsetInfoInstruction := range offsets { @@ -69,13 +92,13 @@ func (dis *Disasm) GetContextDataReferences(offsets []arch.Offset, addresses []u continue } - param, _, err := dis.arch.ReadOpParam(dis, opcode.Addressing(), address) + param, _, err := j.arch.ReadOpParam(dis, opcode.Addressing(), address) if err != nil { return nil, fmt.Errorf("reading opcode parameters: %w", err) } - reference, ok := dis.arch.GetAddressingParam(param) - if ok && reference >= dis.codeBaseAddress && reference < dis.arch.LastCodeAddress() { + reference, ok := j.arch.GetAddressingParam(param) + if ok && reference >= codeBaseAddress && reference < j.arch.LastCodeAddress() { dataReferences = append(dataReferences, reference) } } @@ -85,12 +108,12 @@ func (dis *Disasm) GetContextDataReferences(offsets []arch.Offset, addresses []u // JumpContextInfo builds the list of instructions of the current function context. // in some ROMs the jump engine can be part of a label inside a larger function, // the jump engine detection will use the last instructions before the jmp. -func (dis *Disasm) JumpContextInfo(jumpAddress uint16, offsetInfo arch.Offset) ([]arch.Offset, []uint16) { +func (j *JumpEngine) JumpContextInfo(dis arch.Disasm, jumpAddress uint16, offsetInfo arch.Offset) ([]arch.Offset, []uint16) { var offsets []arch.Offset var addresses []uint16 for address := offsetInfo.Context(); address != 0 && address < jumpAddress; { - offsetInfoInstruction := dis.mapper.offsetInfo(address) + offsetInfoInstruction := dis.OffsetInfo(address) // skip offsets that have not been processed yet if len(offsetInfoInstruction.Data()) == 0 { @@ -113,23 +136,24 @@ func (dis *Disasm) JumpContextInfo(jumpAddress uint16, offsetInfo arch.Offset) ( } // HandleJumpEngineDestination processes a newly detected jump engine destination. -func (dis *Disasm) HandleJumpEngineDestination(caller, destination uint16) error { - for addr := range dis.jumpEngines { +func (j *JumpEngine) HandleJumpEngineDestination(dis arch.Disasm, caller, destination uint16) error { + for addr := range j.jumpEngines { if addr == destination { - return dis.HandleJumpEngineCallers(caller) + return j.HandleJumpEngineCallers(dis, caller) } } return nil } // HandleJumpEngineCallers processes all callers of a newly detected jump engine function. -func (dis *Disasm) HandleJumpEngineCallers(context uint16) error { - offsetInfo := dis.mapper.offsetInfo(context) - offsetInfo.LabelComment = "jump engine detected" +func (j *JumpEngine) HandleJumpEngineCallers(dis arch.Disasm, context uint16) error { + offsetInfo := dis.OffsetInfo(context) + offsetInfo.SetLabelComment("jump engine detected") offsetInfo.SetType(program.JumpEngine) - for _, bankRef := range offsetInfo.branchFrom { - if err := dis.handleJumpEngineCaller(bankRef.address); err != nil { + branchFrom := offsetInfo.BranchFrom() + for _, address := range branchFrom { + if err := j.handleJumpEngineCaller(dis, address); err != nil { return err } } @@ -138,29 +162,29 @@ func (dis *Disasm) HandleJumpEngineCallers(context uint16) error { // handleJumpEngineCaller processes a newly detected jump engine caller, the return address of the call is // marked as function reference instead of code. The first entry of the function table is processed. -func (dis *Disasm) handleJumpEngineCaller(caller uint16) error { - jumpEngine, ok := dis.jumpEngineCallersAdded[caller] +func (j *JumpEngine) handleJumpEngineCaller(dis arch.Disasm, caller uint16) error { + jumpEngine, ok := j.jumpEngineCallersAdded[caller] if !ok { jumpEngine = &jumpEngineCaller{} - dis.jumpEngineCallersAdded[caller] = jumpEngine - dis.jumpEngineCallers = append(dis.jumpEngineCallers, jumpEngine) + j.jumpEngineCallersAdded[caller] = jumpEngine + j.jumpEngineCallers = append(j.jumpEngineCallers, jumpEngine) } // get the address of the function pointers after the jump engine call - offsetInfo := dis.mapper.offsetInfo(caller) + offsetInfo := dis.OffsetInfo(caller) address := caller + uint16(len(offsetInfo.Data())) // remove from code that should be parsed - delete(dis.functionReturnsToParseAdded, address) + dis.DeleteFunctionReturnToParse(address) jumpEngine.tableStartAddress = address - _, err := dis.processJumpEngineEntry(address, jumpEngine) + _, err := j.processJumpEngineEntry(dis, address, jumpEngine) return err } // processJumpEngineEntry processes a potential function reference in a jump engine table. // It returns whether the entry was a valid function reference address and has been added for processing. -func (dis *Disasm) processJumpEngineEntry(address uint16, jumpEngine *jumpEngineCaller) (bool, error) { +func (j *JumpEngine) processJumpEngineEntry(dis arch.Disasm, address uint16, jumpEngine *jumpEngineCaller) (bool, error) { if jumpEngine.terminated { return false, nil } @@ -168,36 +192,37 @@ func (dis *Disasm) processJumpEngineEntry(address uint16, jumpEngine *jumpEngine // verify that the destination is in valid code address range destination, err := dis.ReadMemoryWord(address) if err != nil { - return false, err + return false, fmt.Errorf("reading memory word: %w", err) } - if destination < dis.codeBaseAddress || destination >= dis.arch.LastCodeAddress() { + codeBaseAddress := dis.CodeBaseAddress() + if destination < codeBaseAddress || destination >= j.arch.LastCodeAddress() { jumpEngine.terminated = true return false, nil } - offsetInfo1 := dis.mapper.offsetInfo(address) - offsetInfo2 := dis.mapper.offsetInfo(address + 1) + offsetInfo1 := dis.OffsetInfo(address) + offsetInfo2 := dis.OffsetInfo(address + 1) // if the potential jump table entry is already marked as code, the table end is reached - if offsetInfo1.Offset.Type == program.CodeOffset || offsetInfo2.Offset.Type == program.CodeOffset { + if offsetInfo1.Type() == program.CodeOffset || offsetInfo2.Type() == program.CodeOffset { jumpEngine.terminated = true return false, nil } if jumpEngine.entries == 0 { - offsetInfo1.Offset.SetType(program.JumpTable) + offsetInfo1.SetType(program.JumpTable) } - offsetInfo1.Offset.SetType(program.FunctionReference) - offsetInfo2.Offset.SetType(program.FunctionReference) + offsetInfo1.SetType(program.FunctionReference) + offsetInfo2.SetType(program.FunctionReference) b1, err := dis.ReadMemory(address) if err != nil { - return false, err + return false, fmt.Errorf("reading memory: %w", err) } b2, err := dis.ReadMemory(address + 1) if err != nil { - return false, err + return false, fmt.Errorf("reading memory: %w", err) } offsetInfo1.SetData([]byte{b1, b2}) @@ -209,19 +234,21 @@ func (dis *Disasm) processJumpEngineEntry(address uint16, jumpEngine *jumpEngine return true, nil } -// scanForNewJumpEngineEntry scans all jump engine calls for an unprocessed entry in the function address table that +// ScanForNewJumpEngineEntry scans all jump engine calls for an unprocessed entry in the function address table that // follows the call. It returns whether a new address to parse was added. -func (dis *Disasm) scanForNewJumpEngineEntry() (bool, error) { - for len(dis.jumpEngineCallers) != 0 { +func (j *JumpEngine) ScanForNewJumpEngineEntry(dis arch.Disasm) (bool, error) { + logger := dis.Logger() + + for len(j.jumpEngineCallers) != 0 { minEntries := -1 // find the jump engine table with the smallest number of processed entries, // this conservative approach avoids interpreting code in the table area as function references - for i := 0; i < len(dis.jumpEngineCallers); i++ { - engineCaller := dis.jumpEngineCallers[i] + for i := 0; i < len(j.jumpEngineCallers); i++ { + engineCaller := j.jumpEngineCallers[i] if engineCaller.terminated { // jump engine table is processed, remove it from list to process - dis.jumpEngineCallers = append(dis.jumpEngineCallers[:i], dis.jumpEngineCallers[i+1:]...) + j.jumpEngineCallers = append(j.jumpEngineCallers[:i], j.jumpEngineCallers[i+1:]...) } if i := engineCaller.entries; !engineCaller.terminated && (i < minEntries || minEntries == -1) { @@ -232,28 +259,28 @@ func (dis *Disasm) scanForNewJumpEngineEntry() (bool, error) { return false, nil } - for i := 0; i < len(dis.jumpEngineCallers); i++ { - engineCaller := dis.jumpEngineCallers[i] + for i := 0; i < len(j.jumpEngineCallers); i++ { + engineCaller := j.jumpEngineCallers[i] if engineCaller.entries != minEntries { continue } // calculate next address in table to process address := engineCaller.tableStartAddress + uint16(2*engineCaller.entries) - isEntry, err := dis.processJumpEngineEntry(address, engineCaller) + isEntry, err := j.processJumpEngineEntry(dis, address, engineCaller) if err != nil { return false, err } if isEntry { return true, nil } - dis.logger.Debug("Jump engine table", + logger.Debug("Jump engine table", log.String("address", fmt.Sprintf("0x%04X", engineCaller.tableStartAddress)), log.Int("entries", engineCaller.entries), ) // jump engine table is processed, remove it from list to process - dis.jumpEngineCallers = append(dis.jumpEngineCallers[:i], dis.jumpEngineCallers[i+1:]...) + j.jumpEngineCallers = append(j.jumpEngineCallers[:i], j.jumpEngineCallers[i+1:]...) i-- } } diff --git a/internal/offset.go b/internal/offset.go index 0673836..29f581c 100644 --- a/internal/offset.go +++ b/internal/offset.go @@ -69,3 +69,19 @@ func (o *offset) Context() uint16 { func (o *offset) IsNil() bool { return o == nil } + +func (o *offset) Type() program.OffsetType { + return o.Offset.Type +} + +func (o *offset) SetLabelComment(s string) { + o.Offset.LabelComment = s +} + +func (o *offset) BranchFrom() []uint16 { + branches := make([]uint16, 0, len(o.branchFrom)) + for _, ref := range o.branchFrom { + branches = append(branches, ref.address) + } + return branches +} diff --git a/internal/parser.go b/internal/parser.go index e4fa6bd..e96f181 100644 --- a/internal/parser.go +++ b/internal/parser.go @@ -90,9 +90,9 @@ func (dis *Disasm) addressToDisassemble() (uint16, error) { return address, nil } - isEntry, err := dis.scanForNewJumpEngineEntry() + isEntry, err := dis.jumpEngine.ScanForNewJumpEngineEntry(dis) if err != nil { - return 0, err + return 0, fmt.Errorf("scanning for new jump engine entry: %w", err) } if !isEntry { return 0, nil @@ -147,3 +147,8 @@ func (dis *Disasm) AddAddressToParse(address, context, from uint16, dis.offsetsToParse = append(dis.offsetsToParse, address) } } + +// DeleteFunctionReturnToParse deletes a function return address from the list of addresses to parse. +func (dis *Disasm) DeleteFunctionReturnToParse(address uint16) { + delete(dis.functionReturnsToParseAdded, address) +}