added 16-bit version of mandelbrot for minimal 64

michaelkamprath · Aug 7, 2023 · 295ff46 · 295ff46
1 parent 53d5442
commit 295ff46
Show file tree

Hide file tree

Showing 6 changed files with 713 additions and 48 deletions.
diff --git a/examples/slu4-minimal-64/README.md b/examples/slu4-minimal-64/README.md
@@ -42,8 +42,13 @@ The following instruction macros have been added in the ISA configuration file f
 | `phs4s` | stack offset | - | Push onto stack 4 byte value currently found at indicated stack offset |
 | `pls2` | - | - | Pull 2 bytes from stack. Last byte pulled will be in A register. |
 | `pls4` | - | - | Pull 4 bytes from stack. Last byte pulled will be in A register. |
+| `cpyaa` | absolute address | absolute address | Copies a single byte value from one absolute address (second operand) to another (first operand).
 | `cpy2as` | absolute address | stack offset | Copy 2 bytes of data sourced from indicated stack offset to memory starting at indicated absolute address. Convert from stack big endian ordering to RAM little endian ordering. |
 | `cpy2sa` | stack offset | absolute address | Copy 2 bytes of data sourced from absolute address to stack at indicated offset. Convert from RAM little endian to stack big endian ordering ordering. |
+| `cpy2ai` | absolute address | immediate | Copy 2 bytes of immediate value to memory starting at indicated absolute address. Preserves endian ordering. |
+| `cpy2si` | stack offset | immediate | Copy 2 bytes of immediate value to stack at indicated offset. Convert from RAM little endian to stack big endian ordering ordering. |
+| `cpy2ss` | stack offset | stack offset | Copy 2 bytes of data from stack starting at indicated offset (2nd operand) to another location in stack starting at indicated offset (1rst operand). Byte ordering is preserved. |
+| `cpy2aa` | absolute address | absolute address | Copy 2 bytes starting at source address (secord operand) to destination address (first operand) |
 | `cpy4as` | absolute address | stack offset | Copy 4 bytes of data sourced from indicated stack offset to memory starting at indicated absolute address. Convert from stack big endian ordering to RAM little endian ordering. |
 | `cpy4sa` | stack offset | absolute address | Copy 4 bytes of data sourced from absolute address to stack at indicated offset. Convert from RAM little endian to stack big endian ordering ordering. |
 | `cpy4ai` | absolute address | immediate | Copy 4 bytes of immediate value to memory starting at indicated absolute address. Preserves endian ordering. |
@@ -52,6 +57,11 @@ The following instruction macros have been added in the ISA configuration file f
 | `cpy4aa` | absolute address | absolute address | Copy 4 bytes starting at source address (secord operand) to destination address (first operand) |
 | `inc16a` | absolute address | - | Increment the two byte integer value found at the absolute address |
 | `inc32a` | absolute address | - | Increment the two byte integer value found at the absolute address |
+| `twos2s` | stack offset | - | Calculates the two's complement of a 2 byte value at a given offset into the stack and updates it in place. |
+| `twos2a` | absolute address | - | Calculates the two's complement of a 2 byte value at indicated absolute address and updates it in place. |
+| `twos4s` | stack offset | - | Calculates the two's complement of a 4 byte value at a given offset into the stack and updates it in place. |
+| `twos4a` | absolute address | - | Calculates the two's complement of a 4 byte value at indicated absolute address and updates it in place. |
+
 
 The operand descriptions use the definitions provided by documentation for Minimal 64. You should assume the accumulator (register `A`) is not preserved across any of these macros.
 

diff --git a/examples/slu4-minimal-64/slu4-minimal-64.yaml b/examples/slu4-minimal-64/slu4-minimal-64.yaml
@@ -1382,6 +1382,31 @@ macros:
         - "sta @ARG(0)+0"
         - "ldi BYTE1(@ARG(1))"
         - "sta @ARG(0)+1"
+  cpy2si:
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - immediate_8bit
+            - immediate_16bit
+      instructions:
+        # stack is big endian
+        - "ldi BYTE0(@ARG(1))"
+        - "sts @ARG(0)+1"
+        - "ldi BYTE1(@ARG(1))"
+        - "sts @ARG(0)+0"
+  cpy2ss:
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - immediate_8bit
+            - immediate_8bit
+      instructions:
+        - "lds @ARG(1)+0"
+        - "sts @ARG(0)+0"
+        - "lds @ARG(1)+1"
+        - "sts @ARG(0)+1"
   cpy2aa:
     - operands:
         count: 2
@@ -1454,14 +1479,15 @@ macros:
             - immediate_8bit
             - immediate_32bit
       instructions:
+        # stack is big endian
         - "ldi BYTE0(@ARG(1))"
-        - "sts @ARG(0)+0"
+        - "sts @ARG(0)+3"
         - "ldi BYTE1(@ARG(1))"
-        - "sts @ARG(0)+1"
-        - "ldi BYTE2(@ARG(1))"
         - "sts @ARG(0)+2"
+        - "ldi BYTE2(@ARG(1))"
+        - "sts @ARG(0)+1"
         - "ldi BYTE3(@ARG(1))"
-        - "sts @ARG(0)+3"
+        - "sts @ARG(0)+0"
   cpy4ss:
     - operands:
         count: 2
@@ -1519,3 +1545,75 @@ macros:
         - "acb @ARG(0)+2"
         - "ldi 0"
         - "acb @ARG(0)+3"
+
+  twos2s:
+    # calculates the two's complement of the 2 byte value at offset in stack
+    - operands:
+        count: 1
+        operand_sets:
+          list:
+            - immediate_8bit
+      instructions:
+          - "lds @ARG(0)+1"
+          - "not"
+          - "inc"
+          - "sts @ARG(0)+1"
+          - "lds @ARG(0)+0"
+          - "not"
+          - "aci 0"
+          - "sts @ARG(0)+0"
+  twos2a:
+    # calculates the two's complement of the 2 byte value at absolute address
+    - operands:
+        count: 1
+        operand_sets:
+          list:
+            - absolute_address
+      instructions:
+          - "nob @ARG(0)+0"
+          - "inb @ARG(0)+0"
+          - "nob @ARG(0)+1"
+          - "acb @ARG(0)+1"
+  twos4s:
+    # calculates the two's complement of the 4 byte value at offset in stack
+    - operands:
+        count: 1
+        operand_sets:
+          list:
+            - immediate_8bit
+      instructions:
+          - "lds @ARG(0)+3"
+          - "not"
+          - "inc"
+          - "sts @ARG(0)+3"
+          - "lds @ARG(0)+2"
+          - "not"
+          - "aci 0"
+          - "sts @ARG(0)+2"
+          - "lds @ARG(0)+1"
+          - "not"
+          - "aci 0"
+          - "sts @ARG(0)+1"
+          - "lds @ARG(0)+0"
+          - "not"
+          - "aci 0"
+          - "sts @ARG(0)+0"
+  twos4a:
+    # calculates the two's complement of the 4 byte value at absolute address
+    - operands:
+        count: 1
+        operand_sets:
+          list:
+            - absolute_address
+      instructions:
+          - "nob @ARG(0)+0"
+          - "inb @ARG(0)+0"
+          - "nob @ARG(0)+1"
+          - "ldi 0"
+          - "acb @ARG(0)+1"
+          - "nob @ARG(0)+2"
+          - "ldi 0"
+          - "acb @ARG(0)+2"
+          - "nob @ARG(0)+3"
+          - "ldi 0"
+          - "acb @ARG(0)+3"
diff --git a/examples/slu4-minimal-64/software/mandelbrot16.min64 b/examples/slu4-minimal-64/software/mandelbrot16.min64
@@ -0,0 +1,183 @@
+; Mandelbrot for the Minimal 64 Home Computer
+;
+; Approach is to used fixed point math to only use integer operations. A detailed
+; explanation of this approach can be found here:
+;
+;       https://github.com/rahra/intfract
+;
+#require "slu4-min64-asm >= 1.2.0"
+
+IMAGE_X_PIXELS = 400
+IMAGE_Y_PIXELS = 240
+
+SCALE_BITS = 9
+SCALE_FACTOR = (1 << SCALE_BITS)
+
+MANDELBROT_START_X = -2*SCALE_FACTOR
+MANDELBROT_END_X = 1*SCALE_FACTOR
+MANDELBROT_STEP_X = (MANDELBROT_END_X - MANDELBROT_START_X)/IMAGE_X_PIXELS
+
+MANDELBROT_START_Y = -1*SCALE_FACTOR
+MANDELBROT_END_Y = 1*SCALE_FACTOR
+MANDELBROT_STEP_Y = (MANDELBROT_END_Y - MANDELBROT_START_Y)/IMAGE_Y_PIXELS
+
+MAX_ITERATIONS = $FF
+
+
+.org $8000
+init:
+    spinit              ; init stack
+    jps _Clear
+    cpy2ai cur_pixel_x,0
+    cpy2ai cur_pixel_y,0
+
+.pixel_loop_y:
+    ; calculate scaled y0
+    phs2a cur_pixel_y
+    phs2i MANDELBROT_STEP_Y
+    jps multiply_int16          ; results are 32 bit
+    pls2
+    phs2i MANDELBROT_START_Y
+    jps add16
+    cpy2as scaled_y0,1          ; fetch results
+    pls2 pls2                   ; discard upper 32 bits of multiplications
+
+.pixel_loop_x:
+    ; calcualted scaled x0
+    phs2a cur_pixel_x
+    phs2i MANDELBROT_STEP_X
+    jps multiply_int16          ; results are 32 bit
+    pls2
+    phs2i MANDELBROT_START_X
+    jps add16
+    cpy2as scaled_x0,1          ; fetch results
+    pls2 pls2                   ;
+
+    ; check if in mandelbrot set
+.init_mandelbrot:
+    ; start interations
+    ldi 0 sta iteration_count
+    ; initialize zx and zy
+    cpy2aa zx,scaled_x0
+    cpy2aa zy,scaled_y0
+
+    ; push pixel coordinates on stack
+    phsa cur_pixel_x+0
+    phsa cur_pixel_x+1
+    phsa cur_pixel_y+0
+    jps _SetPixel
+    pls pls pls
+
+.mandelbrot_loop:
+    ; find zx*zx + zy*zy
+    phs2a zx
+    phs2a zx
+    jps multiply_int16
+    phsi SCALE_BITS jps asr32n pls  ; rescale
+    pls2
+    cpy2as zx_squared,1
+    pls2
+
+    phs2a zy
+    phs2a zy
+    jps multiply_int16
+    phsi SCALE_BITS jps asr32n pls  ; rescale
+    pls2
+    cpy2as zy_squared,1
+    phs2a zx_squared
+    jps add16
+    cpy2as temp_int16,1
+    pls2 pls2
+
+    ; check if value is greater than NOT_MANDELBROT_THRESHOLD
+    phs2a temp_int16        ; left value
+    phs2i 4*SCALE_FACTOR    ; right value
+    jps compare_uint16
+    pls2 pls2
+    bgt .not_in_mandelbrot
+
+    ; increment counter and check count
+    inb iteration_count
+    lda iteration_count cpi MAX_ITERATIONS
+    beq .in_mandelbrot          ; if we are at max iterations, point is in set
+
+    ; set up for next mandelbrot iteration
+
+    ; zy = 2*zx*zy + scaled_y0
+    phs2a zx
+    phs2a zy
+    jps multiply_int16
+    phsi (SCALE_BITS-1) jps asr32n pls  ; rescale
+    pls2                                ; remove top 4 bytes
+    ; stack now contains 2*zx*zy
+    phs2a scaled_y0
+    jps add16
+    cpy2as zy,1
+    pls2 pls2
+
+    ; zx = zx*zx - zy*zy + scaled_x0, but store in temp for now
+    phs2a zy_squared        ; Y value
+    phs2a zx_squared        ; X value
+    jps subtract16          ; X-Y
+    phs2a scaled_x0
+    jps add16
+    cpy2as zx,1             ; the new zx value
+    pls2 pls2 pls2
+
+    ; next loop
+    jpa .mandelbrot_loop
+
+.in_mandelbrot:
+    ; push pixel coordinates on stack
+    phsa cur_pixel_x+0
+    phsa cur_pixel_x+1
+    phsa cur_pixel_y+0
+    jps _ClrPixel
+    pls pls pls                 ; remove pixel coordinates from stack
+.not_in_mandelbrot:
+.mandelbot_pixel_done:
+
+.pixel_loop_x_end:
+    ; next x pixel
+    inc16a cur_pixel_x
+    ; check to see if we are done with current x row
+    phs2i IMAGE_X_PIXELS
+    phs2a cur_pixel_x
+    jps compare_uint16
+    pls2 pls2
+    bne .pixel_loop_x
+    cpy2ai cur_pixel_x,0
+
+.pixel_loop_y_end:
+    ; next y pixel
+    inc16a cur_pixel_y
+    ; check to see if we are done overall
+    phs2i IMAGE_Y_PIXELS
+    phs2a cur_pixel_y
+    jps compare_uint16
+    pls2 pls2
+    bne .pixel_loop_y
+
+.looping_done:
+    ldi 0 sta _XPos ldi 29 sta _YPos
+    jps _ScrollUp
+    jpa _Prompt
+
+;
+; Variables
+;
+
+cur_pixel_x:        .2byte 0
+cur_pixel_y:        .2byte 0
+scaled_x0:          .2byte 0
+scaled_y0:          .2byte 0
+zx:                 .2byte 0
+zy:                 .2byte 0
+zx_squared:         .2byte 0
+zy_squared:         .2byte 0
+temp_int16:         .2byte 0
+iteration_count:    .byte 0
+
+#include "math16lib.min64"
+#include "math32lib.min64"
+#include "stringlib.min64"
diff --git a/...slu4-minimal-64/software/mandelbrot.min64 → ...u4-minimal-64/software/mandelbrot32.min64 b/...slu4-minimal-64/software/mandelbrot.min64 → ...u4-minimal-64/software/mandelbrot32.min64