Skip to content

Commit

Permalink
corrected ISA bug and improved speed
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelkamprath committed Jul 15, 2023
1 parent 7fe201c commit 1416250
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 83 deletions.
53 changes: 49 additions & 4 deletions examples/slu4-minimal-64/slu4-minimal-64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1004,8 +1004,8 @@ instructions:
operand_sets:
list:
- absolute_address
ble:
# branch on less than or equal
bgt:
# branch on greater than
bytecode:
value: 0x61
size: 8
Expand All @@ -1014,8 +1014,8 @@ instructions:
operand_sets:
list:
- absolute_address
bgt:
# branch on greater than
ble:
# branch on less than or equal
bytecode:
value: 0x62
size: 8
Expand Down Expand Up @@ -1358,6 +1358,19 @@ macros:
- "sts @ARG(0)+1"
- "lda @ARG(1)+1"
- "sts @ARG(0)+0"
cpy2ai:
- operands:
count: 2
operand_sets:
list:
- absolute_address
- immediate_16bit
instructions:
# Copies in little endian order
- "ldi BYTE0(@ARG(1))"
- "sta @ARG(0)+0"
- "ldi BYTE1(@ARG(1))"
- "sta @ARG(0)+1"
cpy4as:
- operands:
count: 2
Expand Down Expand Up @@ -1441,3 +1454,35 @@ macros:
- "sts @ARG(0)+2"
- "lds @ARG(1)+3"
- "sts @ARG(0)+3"
inc16a:
- operands:
count: 1
operand_sets:
list:
- absolute_address
instructions:
- "lda @ARG(0)+0"
- "inc"
- "sta @ARG(0)+0"
- "lda @ARG(0)+1"
- "aci 0"
- "sta @ARG(0)+1"
inc32a:
- operands:
count: 1
operand_sets:
list:
- absolute_address
instructions:
- "lda @ARG(0)+0"
- "inc"
- "sta @ARG(0)+0"
- "lda @ARG(0)+1"
- "aci 0"
- "sta @ARG(0)+1"
- "lda @ARG(0)+2"
- "aci 0"
- "sta @ARG(0)+2"
- "lda @ARG(0)+3"
- "aci 0"
- "sta @ARG(0)+3"
88 changes: 30 additions & 58 deletions examples/slu4-minimal-64/software/math32lib.min64
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,26 @@

; compare32
; Compares two 32-bit values to determine equality
; X ? Y
;
; Arguments
; sp+3 : left X value (4 bytes)
; sp+7 : right Y value (4 bytes)
; sp+11 : results flags placeholder
; sp+3 : right Y value (4 bytes)
; sp+7 : left X value (4 bytes)
;
; Returns
; sp+11 : results flags:
; bit 0 - Zero Flag (values are equal)
; bit 1 - Carry Flag (X > Y)
; flags will be set per comparison
;
compare32:
; init results
ldi 0 sta .flags ; has to be RAM variable since stack ops alter flags
; first check high bytes, then others in sequence
; values on stack are stored big endian
lds (7+0) sta .rval lds (3+0) cpa .rval bne .done
lds (7+1) sta .rval lds (3+1) cpa .rval bne .done
lds (7+2) sta .rval lds (3+2) cpa .rval bne .done
lds (7+3) sta .rval lds (3+3) cpa .rval
; check zero flag. flags still OK because no stack operatrion since CPA
bne .done ldi %00000001 adb .flags ; note this clears the flags
lds (3+0) sta .rval lds (7+0) cpa .rval bne .done
lds (3+1) sta .rval lds (7+1) cpa .rval bne .done
lds (3+2) sta .rval lds (7+2) cpa .rval bne .done
lds (3+3) sta .rval lds (7+3) cpa .rval
.done:
; check carry flag
bcc .return ldi %00000010 adb .flags
.return:
lda .flags sts 11
rts
.rval: .byte 0
.flags: .byte 0




Expand Down Expand Up @@ -159,25 +149,23 @@ multiply_int32:
;
divide32:
; first check values for 0
phsi 0
phs4i 0
phs4s (7+5)
phs4s (7+4)
jps compare32
pls4
lds 5 cpi 1 beq .divide_by_zero
phs4s (3+5)
beq .divide_by_zero
phs4s (3+4)
jps compare32
pls4
pls4
lds 1 pls cpi 1 beq .return_zero
beq .return_zero
; check if divisor > dividend
phsi 0
phs4s (3+1)
phs4s (7+5)
phs4s (7+0)
phs4s (3+4)
jps compare32
pls4
pls4
lds 1 pls cpi 2 beq .divisor_too_large
bgt .divisor_too_large
.start_division:
; set up working stack:
; little endian
Expand All @@ -195,11 +183,10 @@ divide32:
sta .working_mem+0
ldi 0 sta .carry_bit
; determine if we can do subtraction
phsi 0 ; compare results
phs4s 7+1 ; divisor
phs4a .working_mem+4 ; working value high word
phs4s 7 ; divisor (left)
phs4a .working_mem+4 ; working value high word (right)
jps compare32
lds 9 cpi 0 beq .div_loop_continue
bgt .div_loop_continue
.div_loop_subtraction:
; working value is equal to or larger than divsior
; do the subtraction
Expand All @@ -211,7 +198,6 @@ divide32:
; clear stack
pls4
pls4
pls
; decrement counter and check for 0
deb .counter
lda .counter cpi 0 bne .div_loop
Expand Down Expand Up @@ -265,25 +251,18 @@ divide32:
; sp+7 - value Y, 4 byte value
;
; Return Value
; sp+2 - replace the original 4 byte value with the sum
; sp+3 - replace the original 4 byte value with the sum
;
add32:
; since interacting with the stack has an undefined impact
; on the carry flags, we need to copy the values to local
; variables first.
; stack is big endian, save locally little endian
cpy4as .xval, 3
cpy4as .yval, 7
; star subtractiosn with LSB
lda .xval+0 ada .yval+0 sta .xval+0
lda .xval+1 aca .yval+1 sta .xval+1
lda .xval+2 aca .yval+2 sta .xval+2
lda .xval+3 aca .yval+3 sta .xval+3
; save results back to stack
cpy4sa 3, .xval
; start addition with LSB, Remember, stack is big endian
lds 3+3 ada .yval+0 sts 3+3
lds 3+2 aca .yval+1 sts 3+2
lds 3+1 aca .yval+2 sts 3+1
lds 3+0 aca .yval+3 sts 3+0
; end return
rts
.xval: .4byte 0
.yval: .4byte 0


Expand All @@ -299,22 +278,15 @@ add32:
;
;
subtract32:
; since interacting with the stack has an undefined impact
; on the carry flags, we need to copy the values to local
; variables first.
; stack is big endian, save locally little endian
cpy4as .xval, 3
; stack is big endian, save Y locally little endian
cpy4as .yval, 7
; star subtractiosn with LSB
lda .xval+0 sba .yval+0 sta .xval+0
lda .xval+1 sca .yval+1 sta .xval+1
lda .xval+2 sca .yval+2 sta .xval+2
lda .xval+3 sca .yval+3 sta .xval+3
; save results back to stack
cpy4sa 3, .xval
lds 3+3 sba .yval+0 sts 3+3
lds 3+2 sca .yval+1 sts 3+2
lds 3+1 sca .yval+2 sts 3+1
lds 3+0 sca .yval+3 sts 3+0
; end return
rts
.xval: .4byte 0
.yval: .4byte 0


Expand Down
40 changes: 19 additions & 21 deletions examples/slu4-minimal-64/software/primes.min64
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,7 @@ start:
phs4a _n_value jps print_value32 pls4
phs2i is_prime_str jps _Print pls2
.increment_n:
phs4a _n_value
phs4i 1
jps add32
cpy4as _n_value,1
pls4
pls4
inc32a _n_value
jpa .n_loop

_n_value: .4byte 0
Expand Down Expand Up @@ -64,6 +59,11 @@ print_value32:
;
; returns
; sp+7 : 0 or 1 depending on whether N is prime

_isquared_str: .cstr "I^2 = "
_nprefix_str: .cstr ", N = "
_eol_str: .cstr "\n"

is_prime32:
; first check for 2 or 3
lds 3 cpi 0 bne .modulo_two ; check top byte of N for 0
Expand All @@ -76,19 +76,17 @@ is_prime32:
.modulo_two:
lds 6 lsr bcc .is_not_prime ; see if N's least signficant bit is even or odd
.modulo_three:
phsi 0 ; results placeholder for compare later
phs4i 3 ; place divisor on stack
phs4s (3+5) ; place dividend on stack (from stack)
phs4s (3+4) ; place dividend on stack (from stack)
jps divide32
pls4
; check if remainder is 0
phs4i 0 jps compare32 pls4
pls4
pls cpi 1 beq .is_not_prime
beq .is_not_prime
.loop_init:
cpy4ai .current_i_val,5
.loop:
phsi 0 ; push placeholder on stack for later compare
phs4a .current_i_val
phs4a .current_i_val
jps multiply_uint32
Expand All @@ -98,40 +96,40 @@ is_prime32:
lds 3 cpi 0 bne .iteration_loop_done
lds 4 cpi 0 bne .iteration_loop_done
pls4
; now compare low 4 bytes of I*I result to N
phs4s (3+5) jps compare32 pls4
cpy4as .isquared,1
pls4
; if N < I*I, we are done
pls cpi 0 beq .loop_done_is_prime
; now compare low 4 bytes of I*I result to N
phs4a .isquared phs4s 3+4 jps compare32 pls4 pls4
; if I*I > N, we are done
bgt .loop_done_is_prime
.n_gte_i_squared:
; now check various modulos.
; check N % I == 0
phsi 0 ; push placeholder on stack for later compare
phs4a .current_i_val ; I
phs4s (3+5) ; N
phs4s (3+4) ; N
jps divide32
pls4 ; quotient
phs4i 0
jps compare32
pls4
pls4
pls cpi 1 beq .loop_done_is_not_prime
beq .loop_done_is_not_prime
; check N % (I+2) == 0
phs4a .current_i_val ; I
phs4i 2
jps add32
cpy4as .temp_val, 1
pls4
pls4
phsi 0
phs4a .temp_val
phs4s (3+5)
phs4s (3+4)
jps divide32
pls4 ; quotient
phs4i 0
jps compare32
pls4 ; zero
pls4 ; remainder
pls cpi 1 beq .loop_done_is_not_prime
beq .loop_done_is_not_prime
; add 6 to I and loop
phs4a .current_i_val
phs4i 6
Expand All @@ -144,7 +142,6 @@ is_prime32:
; get rid of I*I stack
pls4
pls4
pls
.loop_done_is_prime:
.is_prime:
ldi 1 sts 7
Expand All @@ -156,6 +153,7 @@ is_prime32:
.current_i_val: .4byte 0
.temp_val: .4byte 0

.isquared: .4byte 0

#include "math32lib.min64"
#include "stringlib.min64"

0 comments on commit 1416250

Please sign in to comment.