-
Notifications
You must be signed in to change notification settings - Fork 6
/
mult63.a
172 lines (148 loc) · 3.53 KB
/
mult63.a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
; mult63.a
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
; - adjusted to use fixed zero page addresses
; - removed 'decrement to avoid clc' as this is slower on average
; - rearranged memory use to remove final memory copy and give LSB first order to result
; - removed temp zp storage bytes
; - unrolled the outer loop
; - unrolled the two inner loops twice
;
; 16 bit x 16 bit unsigned multiply, 32 bit result
; Average cycles: 422.00
; 165 bytes
multiplicand = $02 ; 2 bytes
multiplier = $04 ; 2 bytes
result = $04 ; 4 bytes (note: shares memory with multiplier)
* = $0200
; 16 bit x 16 bit unsigned multiply, 32 bit result
;
; On Entry:
; (multiplier, multiplier+1): two byte multiplier, four bytes needed for result
; (multiplicand, multiplicand+1): two byte multiplicand
; On Exit:
; (result, result+1, result+2, result+3): product
mult
lda #0 ;
sta result+2 ; 16 bits of zero in A, result+2
; Note: First 8 shifts are A -> result+2 -> result
; Final 8 shifts are A -> result+2 -> result+1
; --- 1st byte ---
ldy #2 ; count for inner loop
lsr result
; inner loop (8 times)
inner_loop
; first time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result
; second time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result
; third time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result
; fourth time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result
dey
bne inner_loop ; go back for 1 more shift?
; --- 2nd byte ---
ldy #2 ; count for inner loop
lsr result+1
; inner loop (8 times)
inner_loop2
; first time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result+1
; second time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result+1
; third time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result+1
; fourth time
bcc +
tax ; retain A
lda result+2
clc
adc multiplicand
sta result+2
txa ; recall A
adc multiplicand+1
+
ror ; shift
ror result+2
ror result+1
dey
bne inner_loop2 ; go back for 1 more shift?
sta result+3 ; ms byte of hi-word of result
rts