From 9acca52e443986b43cb20dcdbafbd8a3bfefbb1f Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Tue, 16 Apr 2019 16:49:47 -0400 Subject: [PATCH] zasm: begin parsing with easy pickings Single opcodes that yield constants. "echo ret | zasm" yields 0xc9. --- apps/zasm/emul/Makefile | 2 +- apps/zasm/emul/glue.asm | 6 +- apps/zasm/emul/user.inc | 3 + apps/zasm/zasm.asm | 209 ++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 210 insertions(+), 10 deletions(-) create mode 100644 apps/zasm/emul/user.inc diff --git a/apps/zasm/emul/Makefile b/apps/zasm/emul/Makefile index 34b0740..e30ad47 100644 --- a/apps/zasm/emul/Makefile +++ b/apps/zasm/emul/Makefile @@ -8,4 +8,4 @@ kernel.h: glue.asm scas -o - $< | ./bin2c.sh KERNEL > $@ zasm.h: ../zasm.asm - scas -o - $< | ./bin2c.sh ZASM > $@ + scas -o - -I ./emul $< | ./bin2c.sh ZASM > $@ diff --git a/apps/zasm/emul/glue.asm b/apps/zasm/emul/glue.asm index 24966d6..ddfc3b1 100644 --- a/apps/zasm/emul/glue.asm +++ b/apps/zasm/emul/glue.asm @@ -1,7 +1,5 @@ +#include "user.inc" ; Glue code for the emulated environment -RAMSTART .equ 0x8000 -RAMEND .equ 0xffff -ZASM_CODE .equ RAMSTART ZASM_INPUT .equ 0xa000 ZASM_OUTPUT .equ 0xd000 @@ -12,7 +10,7 @@ init: ld sp, hl ld hl, ZASM_INPUT ld de, ZASM_OUTPUT - call ZASM_CODE + call USER_CODE ; signal the emulator we're done ; BC contains the number of written bytes ld a, b diff --git a/apps/zasm/emul/user.inc b/apps/zasm/emul/user.inc new file mode 100644 index 0000000..af7a2dc --- /dev/null +++ b/apps/zasm/emul/user.inc @@ -0,0 +1,3 @@ +RAMSTART .equ 0x8000 +RAMEND .equ 0xffff +USER_CODE .equ RAMSTART diff --git a/apps/zasm/zasm.asm b/apps/zasm/zasm.asm index 450a6f7..437adcf 100644 --- a/apps/zasm/zasm.asm +++ b/apps/zasm/zasm.asm @@ -1,6 +1,205 @@ -; dummy code, to test emulator -ld a, (hl) -inc a -ld (de), a -ld bc, 1 +#include "user.inc" +.org USER_CODE +call parseLine +ld b, 0 +ld c, a ; written bytes ret + +; CORE COPY PASTE - TODO: call in kernel +; Compares strings pointed to by HL and DE up to A count of characters. If +; equal, Z is set. If not equal, Z is reset. +strncmp: + push bc + push hl + push de + + ld b, a +.loop: + ld a, (de) + cp (hl) + jr nz, .end ; not equal? break early. NZ is carried out + ; to the called + cp 0 ; If our chars are null, stop the cmp + jr z, .end ; The positive result will be carried to the + ; caller + inc hl + inc de + djnz .loop + ; We went through all chars with success, but our current Z flag is + ; unset because of the cp 0. Let's do a dummy CP to set the Z flag. + cp a + +.end: + pop de + pop hl + pop bc + ; Because we don't call anything else than CP that modify the Z flag, + ; our Z value will be that of the last cp (reset if we broke the loop + ; early, set otherwise) + ret + +; add the value of A into DE +addDE: + add a, e + jr nc, .end ; no carry? skip inc + inc d +.end: + ld e, a + ret + +; Transforms the character in A, if it's in the a-z range, into its upcase +; version. +upcase: + cp 'a' + ret c ; A < 'a'. nothing to do + cp 'z'+1 + ret nc ; A >= 'z'+1. nothing to do + ; 'a' - 'A' == 0x20 + sub 0x20 + ret + +; ZASM code +; Sets Z is A is ' ', CR, LF, or null. +isSep: + cp ' ' + ret z + cp 0 + ret z + cp 0x0d + ret z + cp 0x0a + ret + +; read word in (HL) and put it in curWord, null terminated. A is the read +; length. +readWord: + push bc + push de + push hl + ld de, curWord + ld b, 4 +.loop: + ld a, (hl) + call isSep + jr z, .success + call upcase + ld (de), a + inc hl + inc de + djnz .loop +.success: + xor a + ld (de), a + ld a, 4 + sub a, b + jr .end +.error: + xor a + ld (de), a +.end: + pop hl + pop de + pop bc + ret + +; Compare primary row at (DE) with string at curWord. Sets Z flag if there's a +; match, reset if not. +matchPrimaryRow: + push hl + ld hl, curWord + ld a, 4 + call strncmp + pop hl + ret + +; Parse line at (HL) and write resulting opcode(s) in (DE). Returns the number +; of bytes written in A. +parseLine: + call readWord + push de + ld de, instTBlPrimary +.loop: + ld a, (de) + cp 0 + jr z, .nomatch ; we reached last entry + call matchPrimaryRow + jr z, .match + ld a, 7 + call addDE + jr .loop + +.nomatch: + xor a + pop de + ret +.match: + ld a, 6 ; upcode is on 7th byte + call addDE + ld a, (de) + pop de + ld (de), a + ld a, 1 + ret + +; This is a list of primary instructions (single upcode) that lead to a +; constant (no group code to insert). +; That doesn't mean that they don't take any argument though. For example, +; "DEC IX" leads to a special upcode. These kind of constants are indicated +; as a single byte to save space. Meaning: +; +; All single char registers (A/B/C etc) -> themselves +; HL -> h +; (HL) -> l +; DE -> d +; (DE) -> e +; BC -> b +; (BC) -> c +; IX -> X +; (IX) -> x +; IY -> Y +; (IY) -> y +; AF -> a +; AF' -> f +; SP -> s +; (SP) -> p +; None -> 0 +; +; This is a sorted list of "primary" (single byte) instructions along with +; metadata +; 4 bytes for the name (fill with zero) +; 1 byte for arg constant +; 1 byte for 2nd arg constant +; 1 byte for upcode +instTBlPrimary: + .db "ADD", 0, 'A', 'h', 0x86 ; ADD A, HL + .db "CCF", 0, 0, 0, 0x3f ; CCF + .db "CPL", 0, 0, 0, 0x2f ; CPL + .db "DAA", 0, 0, 0, 0x27 ; DAA + .db "DI",0,0, 0, 0, 0xf3 ; DI + .db "EI",0,0, 0, 0, 0xfb ; EI + .db "EX",0,0, 'p', 'h', 0xe3 ; EX (SP), HL + .db "EX",0,0, 'a', 'f', 0x08 ; EX AF, AF' + .db "EX",0,0, 'd', 'h', 0xeb ; EX DE, HL + .db "EXX", 0, 0, 0, 0xd9 ; EXX + .db "HALT", 0, 0, 0x76 ; HALT + .db "INC", 0, 'l', 0, 0x34 ; INC (HL) + .db "JP",0,0, 'l', 0, 0xe9 ; JP (HL) + .db "LD",0,0, 'c', 'A', 0x02 ; LD (BC), A + .db "LD",0,0, 'e', 'A', 0x12 ; LD (DE), A + .db "LD",0,0, 'A', 'c', 0x0a ; LD A, (BC) + .db "LD",0,0, 'A', 'e', 0x0a ; LD A, (DE) + .db "LD",0,0, 's', 'h', 0x0a ; LD SP, HL + .db "NOP", 0, 0, 0, 0x00 ; NOP + .db "RET", 0, 0, 0, 0xc9 ; RET + .db "RLA", 0, 0, 0, 0x17 ; RLA + .db "RLCA", 0, 0, 0x07 ; RLCA + .db "RRA", 0, 0, 0, 0x1f ; RRA + .db "RRCA", 0, 0, 0x0f ; RRCA + .db "SCF", 0, 0, 0, 0x37 ; SCF + .db 0 + +; *** Variables *** +; enough space for 4 chars and a null +curWord: + .db 0, 0, 0, 0, 0 +