From 57c1a10434c93ecd99eeddd38466e10421a10211 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Fri, 13 Dec 2019 17:38:40 -0500 Subject: [PATCH] avra: first steps --- apps/zasm/README.md | 13 ++ apps/zasm/avr.asm | 263 ++++++++++++++++++++++++++++++++++++++++ apps/zasm/avr.txt | 134 ++++++++++++++++++++ apps/zasm/gluea.asm | 45 +++++++ apps/zasm/util.asm | 29 +++++ tools/emul/.gitignore | 1 + tools/emul/Makefile | 20 ++- tools/emul/README.md | 6 + tools/emul/zasm/zasm.c | 4 + tools/tests/Makefile | 3 +- tools/tests/avra/runtests.sh | 28 +++++ tools/tests/avra/test1.asm | 5 + tools/tests/avra/test1.expected | 1 + 13 files changed, 546 insertions(+), 6 deletions(-) create mode 100644 apps/zasm/avr.asm create mode 100644 apps/zasm/avr.txt create mode 100644 apps/zasm/gluea.asm create mode 100755 tools/tests/avra/runtests.sh create mode 100644 tools/tests/avra/test1.asm create mode 100644 tools/tests/avra/test1.expected diff --git a/apps/zasm/README.md b/apps/zasm/README.md index c2e4f3e..7b3f35d 100644 --- a/apps/zasm/README.md +++ b/apps/zasm/README.md @@ -165,4 +165,17 @@ using `IX` and `IY` as 8-bit registers. We used to support them, but because this makes our code incompatible with Z80-compatible CPUs such as the Z180, we prefer to avoid these in our code. +## AVR assembler + +`zasm` can be configured, at compile time, to be a AVR assembler instead of a +z80 assembler. Directives, literals, symbols, they're all the same, it's just +instructions and their arguments that change. + +Instructions and their arguments have a ayntax that is similar to other AVR +assemblers: registers are referred to as `rXX`, mnemonics are the same, +arguments are separated by commas. + +To assemble an AVR assembler, use the `gluea.asm` file instead of the regular +one. + [libz80]: https://github.com/ggambetta/libz80 diff --git a/apps/zasm/avr.asm b/apps/zasm/avr.asm new file mode 100644 index 0000000..50db214 --- /dev/null +++ b/apps/zasm/avr.asm @@ -0,0 +1,263 @@ +; Same thing as instr.asm, but for AVR instructions + +; *** Instructions table *** + +; List of mnemonic names separated by a null terminator. Their index in the +; list is their ID. Unlike in zasm, not all mnemonics have constant associated +; to it because it's generally not needed. This list is grouped by argument +; categories, and then alphabetically. Categories are ordered so that the 8bit +; opcodes come first, then the 16bit ones. 0xff ends the chain +instrNames: +; Rd(5) + Rr(5) +.equ I_ADC 0 +.db "ADC", 0 +.db "ADD", 0 +.db "AND", 0 +.db "CLR", 0 +.db "CP", 0 +.db "CPC", 0 +.db "CPSE", 0 +.db "EOR", 0 +.db "MOV", 0 +.db "MUL", 0 +.db "OR", 0 +.db "SBC", 0 +.db "SUB", 0 +; no arg +.equ I_BREAK 13 +.db "BREAK", 0 +.db "CLC", 0 +.db "CLH", 0 +.db "CLI", 0 +.db "CLN", 0 +.db "CLS", 0 +.db "CLT", 0 +.db "CLV", 0 +.db "CLZ", 0 +.db "EICALL", 0 +.db "EIJMP", 0 +.db "ICALL", 0 +.db "IJMP", 0 +.db "NOP", 0 +.db "RET", 0 +.db "RETI", 0 +.db "SEC", 0 +.db "SEH", 0 +.db "SEI", 0 +.db "SEN", 0 +.db "SES", 0 +.db "SET", 0 +.db "SEV", 0 +.db "SEZ", 0 +.db "SLEEP", 0 +.db "WDR", 0 +; Rd(5) +.equ I_ASR 39 +.db "ASR", 0 +.db "COM", 0 +.db "DEC", 0 +.db "INC", 0 +.db "LAC", 0 +.db "LAS", 0 +.db "LAT", 0 +.db "LSR", 0 +.db "NEG", 0 +.db "POP", 0 +.db "PUSH", 0 +.db "ROR", 0 +.db "SWAP", 0 +.db "XCH", 0 +.db 0xff + + +; 8-bit constant masks associated with each instruction. In the same order as +; in instrNames +instrUpMasks1: +; Rd(5) + Rd(5): XXXXXXrd ddddrrrr +.db 0b00011100 ; ADC +.db 0b00001100 ; ADD +.db 0b00100000 ; AND +.db 0b00100100 ; CLR +.db 0b00010100 ; CP +.db 0b00000100 ; CPC +.db 0b00010000 ; CPSE +.db 0b00100100 ; EOR +.db 0b00101100 ; MOV +.db 0b10011100 ; MUL +.db 0b00101000 ; OR +.db 0b00001000 ; SBC +.db 0b00011000 ; SUB + +; 16-bit constant masks associated with each instruction. In the same order as +; in instrNames +instrUpMasks2: +; no arg +.db 0b10010101, 0b10011000 ; BREAK +.db 0b10010100, 0b10001000 ; CLC +.db 0b10010100, 0b11011000 ; CLH +.db 0b10010100, 0b11111000 ; CLI +.db 0b10010100, 0b10101000 ; CLN +.db 0b10010100, 0b11001000 ; CLS +.db 0b10010100, 0b11101000 ; CLT +.db 0b10010100, 0b10111000 ; CLV +.db 0b10010100, 0b10011000 ; CLZ +.db 0b10010101, 0b00011001 ; EICALL +.db 0b10010100, 0b00011001 ; EIJMP +.db 0b10010101, 0b00001001 ; ICALL +.db 0b10010100, 0b00001001 ; IJMP +.db 0b00000000, 0b00000000 ; NOP +.db 0b10010101, 0b00001000 ; RET +.db 0b10010101, 0b00011000 ; RETI +.db 0b10010100, 0b00001000 ; SEC +.db 0b10010100, 0b01011000 ; SEH +.db 0b10010100, 0b01111000 ; SEI +.db 0b10010100, 0b00101000 ; SEN +.db 0b10010100, 0b01001000 ; SES +.db 0b10010100, 0b01101000 ; SET +.db 0b10010100, 0b00111000 ; SEV +.db 0b10010100, 0b00011000 ; SEZ +.db 0b10010101, 0b10001000 ; SLEEP +.db 0b10010101, 0b10101000 ; WDR +; Rd(5): XXXXXXXd ddddXXXX +.db 0b10010100, 0b00000101 ; ASR +.db 0b10010100, 0b00000000 ; COM +.db 0b10010100, 0b00001010 ; DEC +.db 0b10010100, 0b00000011 ; INC +.db 0b10010010, 0b00000110 ; LAC +.db 0b10010010, 0b00000101 ; LAS +.db 0b10010010, 0b00000111 ; LAT +.db 0b10010100, 0b00000110 ; LSR +.db 0b10010100, 0b00000001 ; NEG +.db 0b10010000, 0b00001111 ; POP +.db 0b10010010, 0b00001111 ; PUSH +.db 0b10010100, 0b00000111 ; ROR +.db 0b10010100, 0b00000010 ; SWAP +.db 0b10010010, 0b00000100 ; XCH + +; Same signature as getInstID in instr.asm +; Reads string in (HL) and returns the corresponding ID (I_*) in A. Sets Z if +; there's a match. +getInstID: + push bc + push hl + push de + ex de, hl ; DE makes a better needle + ; haystack. -1 because we inc HL at the beginning of the loop + ld hl, instrNames-1 + ld b, 0xff ; index counter +.loop: + inc b + inc hl + ld a, (hl) + inc a ; check if 0xff + jr z, .notFound + call strcmpIN + jr nz, .loop + ; found! + ld a, b ; index + cp a ; ensure Z +.end: + pop de + pop hl + pop bc + ret +.notFound: + dec a ; unset Z + jr .end + +; Same signature as parseInstruction in instr.asm +; Parse instruction specified in A (I_* const) with args in I/O and write +; resulting opcode(s) in I/O. +; Sets Z on success. On error, A contains an error code (ERR_*) +parseInstruction: + ; BC, during .spit, is ORred to the spitted opcode. + ld bc, 0 + cp I_BREAK + jr c, .spitRd5Rr5 + cp I_ASR + jr c, .spitNoArg + ; spitRd5 + ld d, a ; save A for later + call .readR5 + ret nz + call .placeRd + ld a, d ; restore A + ; continue to .spitNoArg +.spitNoArg: + call .getUp2 + jr .spit +.spitRd5Rr5: + ld d, a ; save A for later + call .readR5 + ret nz + call .placeRd + call readComma + call .readR5 + ret nz + push af ; --> lvl 1 + ; let's start with the 4 lower bits + and 0xf + or c + ; We now have our LSB in A. Let's spit it now. + call ioPutB + pop af ; <-- lvl 1 + ; and now that last high bit, currently bit 4, which must become bit 1 + and 0b00010000 + rra \ rra \ rra + or b + ld b, a + ld a, d ; restore A + ld hl, instrUpMasks1 + call addHL + ; now that's our MSB + jr .spitMSB +.spit: + ; LSB is spit *before* MSB + inc hl + ld a, (hl) + or c + call ioPutB + dec hl +.spitMSB: + ld a, (hl) + or b + call ioPutB + xor a ; ensure Z, set success + ret + +; local routines +; place number in A in BC at position .......d dddd.... +; BC is assumed to be 0 +.placeRd: + sla a \ rla \ rla \ rla ; last RLA might set carry + rl b + ld c, a + ret +; Fetch a 16-bit upcode specified by instr index in A and set that upcode in HL +.getUp2: + sub I_BREAK + sla a ; A * 2 + ld hl, instrUpMasks2 + jp addHL + +; read a rXX argument and return register number in A. +; Set Z for success. +.readR5: + call readWord + ld a, (hl) + call upcase + cp 'R' + ret nz ; not a register + inc hl + call parseDecimal + ret nz + push ix \ pop hl + ld a, h + or a + ret nz ; should be zero + ld a, l + cp 32 + jp nc, unsetZ ; must be < 32 + ; we're good! + cp a ; ensure Z + ret diff --git a/apps/zasm/avr.txt b/apps/zasm/avr.txt new file mode 100644 index 0000000..8dbca73 --- /dev/null +++ b/apps/zasm/avr.txt @@ -0,0 +1,134 @@ +The AVR instruction set is a bit more regular than z80's, which allows us for +simpler upcode spitting logic (simplicity which is lost when we need to take +into account all AVR models and instruction constraints on each models). This +file categorizes all available ops with their opcode signature. X means upcode +bit. + +Categories are in descending order of "popularity" + +Mnemonics with "*" are a bit special. + +### 16-bit + +## Plain + +XXXX XXXX XXXX XXXX + +BREAK, CLC, CLH, CLI, CLN, CLS, CLT, CLV, CLZ, EICALL, EIJMP, ELPM*, ICALL, +IJMP, NOP, RET, RETI, SEC, SEH, SEI, SEN, SES, SET, SEV, SEZ, SLEEP, SPM*, WDR + +## Rd(5) + +XXXX XXXd dddd XXXX + +ASR, COM, DEC, ELPM*, INC, LAC, LAS, LAT, LD*, LPM*, LSR, NEG, POP, PUSH, ROR, +ST*, SWAP, XCH + +## Rd(5) + Rr(5) + +XXXX XXrd dddd rrrr + +ADC, ADD, AND, CLR, CP, CPC, CPSE, EOR, MOV, MUL, OR, ROL*, SBC, SUB, +TST* + +## k(7) + +XXXX XXkk kkkk kXXX + +BRCC, BRCS, BREQ, BRGE, BRHC, BRHS, BRID, BRIE, BRLO, BRLT, BRMI, BRNE, BRPL, +BRSH, BRTC, BRTS, BRVC, BRVS + +## Rd(4) + K(8) + +XXXX KKKK dddd KKKK + +ANDI, CBR*, CPI, LDI, ORI, SBCI, SBR, SUBI + +## Rd(5) + bit + +XXXX XXXd dddd Xbbb + +BLD, BST, SBRC, SBRS + +## A(5) + bit + +XXXX XXXX AAAA Abbb + +CBI, SBI, SBIC, SBIS + +## Rd(3) + Rr(3) + +XXXX XXXX Xddd Xrrr + +FMUL, FMULS, FMULSU, MULSU + +## Rd(4) + Rr(4) + +XXXX XXXX dddd rrrr + +MOVW, MULS + +## Rd(5) + A(6) + +XXXX XAAd dddd AAAA + +IN, OUT + +## Rd(4) + k(7) + +XXXX Xkkk dddd kkkk + +LDS*, STS* + +## Rd(2) + K + +XXXX XXXX KKdd KKKK + +ADIW, SBIW + +## Rd(4) + +XXXX XXXX dddd XXXX + +SER + +## K(4) + +XXXX XXXX KKKK XXXX + +DES + +## k(12) + +XXXX kkkk kkkk kkkk + +RCALL, RJMP + +## SREG + +XXXX XXXX Xsss XXXX + +BCLR, BSET + +## SREG + k(7) + +XXXX XXkk kkkk ksss + +BRBC, BRBS + +### 32-bit + +## k(22) + +XXXX XXXk kkkk XXXk +kkkk kkkk kkkk kkkk + +CALL, JMP + +## Rd(5) + k(16) + +XXXX XXXd dddd XXXX +kkkk kkkk kkkk kkkk + +LDS*, STS* + diff --git a/apps/zasm/gluea.asm b/apps/zasm/gluea.asm new file mode 100644 index 0000000..e10073a --- /dev/null +++ b/apps/zasm/gluea.asm @@ -0,0 +1,45 @@ +; avra +; +; This glue code assembles as assembler for AVR microcontrollers. It looks a +; lot like zasm, but it spits AVR binary. Comments have been stripped, refer +; to glue.asm for details. + +.inc "user.h" + +; *** Overridable consts *** +.equ ZASM_REG_MAXCNT 0xff +.equ ZASM_LREG_MAXCNT 0x20 +.equ ZASM_REG_BUFSZ 0x700 +.equ ZASM_LREG_BUFSZ 0x100 + +; ****** + +.inc "err.h" +.inc "ascii.h" +.inc "blkdev.h" +.inc "fs.h" +jp zasmMain + +.inc "core.asm" +.inc "zasm/const.asm" +.inc "lib/util.asm" +.inc "lib/ari.asm" +.inc "lib/parse.asm" +.inc "lib/args.asm" +.inc "zasm/util.asm" +.equ IO_RAMSTART USER_RAMSTART +.inc "zasm/io.asm" +.equ TOK_RAMSTART IO_RAMEND +.inc "zasm/tok.asm" +.inc "zasm/avr.asm" +.equ DIREC_RAMSTART TOK_RAMEND +.inc "zasm/directive.asm" +.inc "zasm/parse.asm" +.equ EXPR_PARSE parseNumberOrSymbol +.inc "lib/expr.asm" +.equ SYM_RAMSTART DIREC_RAMEND +.inc "zasm/symbol.asm" +.equ ZASM_RAMSTART SYM_RAMEND +.inc "zasm/main.asm" +USER_RAMSTART: + diff --git a/apps/zasm/util.asm b/apps/zasm/util.asm index 262758b..4ad85f0 100644 --- a/apps/zasm/util.asm +++ b/apps/zasm/util.asm @@ -76,6 +76,35 @@ strncmpI: ; early, set otherwise) ret +; strcmp, then next. Same thing as strcmp, but case insensitive and if strings +; are not equal, make HL point to the character right after the null +; termination. We assume that the haystack (HL), has uppercase chars. +strcmpIN: + push de ; --> lvl 1 + push hl ; --> lvl 2 + +.loop: + ld a, (de) + call upcase + cp (hl) + jr nz, .notFound ; not equal? break early. + or a ; If our chars are null, stop the cmp + jr z, .found + inc hl + inc de + jr .loop +.found: + pop hl ; <-- lvl 2 + pop de ; <-- lvl 1 + ; Z already set + ret +.notFound: + ; Not found, we skip the string + call strskip + pop de ; <-- lvl 2, junk + pop de ; <-- lvl 1 + ret + ; If string at (HL) starts with ( and ends with ), "enter" into the parens ; (advance HL and put a null char at the end of the string) and set Z. ; Otherwise, do nothing and reset Z. diff --git a/tools/emul/.gitignore b/tools/emul/.gitignore index 2b9ca3c..db7da81 100644 --- a/tools/emul/.gitignore +++ b/tools/emul/.gitignore @@ -1,6 +1,7 @@ /shell/shell /bshell/shell /zasm/zasm +/zasm/avra /runbin/runbin /*/*-bin.h /*/*.bin diff --git a/tools/emul/Makefile b/tools/emul/Makefile index b77838a..79b1342 100644 --- a/tools/emul/Makefile +++ b/tools/emul/Makefile @@ -3,6 +3,7 @@ TARGETS = shell/shell zasm/zasm runbin/runbin KERNEL = ../../kernel APPS = ../../apps ZASMBIN = zasm/zasm +AVRABIN = zasm/avra ZASMSH = ../zasm.sh SHELLAPPS = zasm ed SHELLTGTS = ${SHELLAPPS:%=cfsin/%} @@ -10,7 +11,7 @@ CFSIN_CONTENTS = $(SHELLTGTS) cfsin/user.h OBJS = emul.o libz80/libz80.o .PHONY: all -all: $(TARGETS) $(CFSIN_CONTENTS) +all: $(TARGETS) $(AVRABIN) $(CFSIN_CONTENTS) # -o in sync with SHELL_CODE in shell/glue.asm shell/shell.bin: shell/glue.asm $(ZASMBIN) @@ -19,18 +20,27 @@ shell/shell.bin: shell/glue.asm $(ZASMBIN) shell/shell-bin.h: shell/shell.bin ./bin2c.sh KERNEL < shell/shell.bin | tee $@ > /dev/null +shell/shell: shell/shell.c $(OBJS) shell/shell-bin.h + $(CC) shell/shell.c $(OBJS) -o $@ + zasm/kernel-bin.h: zasm/kernel.bin ./bin2c.sh KERNEL < zasm/kernel.bin | tee $@ > /dev/null zasm/zasm-bin.h: zasm/zasm.bin ./bin2c.sh USERSPACE < zasm/zasm.bin | tee $@ > /dev/null -shell/shell: shell/shell.c $(OBJS) shell/shell-bin.h - $(CC) shell/shell.c $(OBJS) -o $@ - $(ZASMBIN): zasm/zasm.c $(OBJS) zasm/kernel-bin.h zasm/zasm-bin.h $(CFSPACK) $(CC) zasm/zasm.c $(OBJS) -o $@ +zasm/avra.bin: $(ZASMBIN) + $(ZASMSH) $(KERNEL) $(APPS) zasm/user.h < $(APPS)/zasm/gluea.asm > $@ + +zasm/avra-bin.h: zasm/avra.bin + ./bin2c.sh USERSPACE < zasm/avra.bin | tee $@ > /dev/null + +$(AVRABIN): zasm/zasm.c $(OBJS) zasm/kernel-bin.h zasm/avra-bin.h + $(CC) -D AVRA zasm/zasm.c $(OBJS) -o $@ + runbin/runbin: runbin/runbin.c $(OBJS) $(CC) runbin/runbin.c $(OBJS) -o $@ @@ -52,7 +62,7 @@ cfsin/user.h: shell/user.h cp shell/user.h $@ .PHONY: updatebootstrap -updatebootstrap: $(ZASMBIN) $(INCCFS) +updatebootstrap: $(ZASMBIN) $(ZASMSH) $(KERNEL) < zasm/glue.asm > zasm/kernel.bin $(ZASMSH) $(KERNEL) $(APPS) zasm/user.h < $(APPS)/zasm/glue.asm > zasm/zasm.bin diff --git a/tools/emul/README.md b/tools/emul/README.md index cb664bf..d555bb1 100644 --- a/tools/emul/README.md +++ b/tools/emul/README.md @@ -60,6 +60,12 @@ Those binaries can be updated with the `make updatebootstrap` command. If they are up-to date and that zasm isn't broken, this command should output the same binary as before. +## avra + +In the `zasm` folder, there's also `avra` which is a zasm compiled as an AVR +assembler. It works the same way as zasm except it expects AVR mnemonics and +spits AVR binaries. + ## runbin This is a very simple tool that reads binary z80 code from stdin, loads it in diff --git a/tools/emul/zasm/zasm.c b/tools/emul/zasm/zasm.c index f65af8f..ccb8c33 100644 --- a/tools/emul/zasm/zasm.c +++ b/tools/emul/zasm/zasm.c @@ -3,7 +3,11 @@ #include #include "../emul.h" #include "kernel-bin.h" +#ifdef AVRA +#include "avra-bin.h" +#else #include "zasm-bin.h" +#endif /* zasm reads from a specified blkdev, assemble the file and writes the result * in another specified blkdev. In our emulator layer, we use stdin and stdout diff --git a/tools/tests/Makefile b/tools/tests/Makefile index 941ce75..dec94c9 100644 --- a/tools/tests/Makefile +++ b/tools/tests/Makefile @@ -3,9 +3,10 @@ CFSPACK = ../cfspack/cfspack .PHONY: run run: - $(MAKE) -C $(EMULDIR) zasm/zasm runbin/runbin shell/shell + $(MAKE) -C $(EMULDIR) zasm/zasm zasm/avra runbin/runbin shell/shell cd unit && ./runtests.sh cd zasm && ./runtests.sh + cd avra && ./runtests.sh cd shell && ./runtests.sh $(CFSPACK): diff --git a/tools/tests/avra/runtests.sh b/tools/tests/avra/runtests.sh new file mode 100755 index 0000000..99b8ffa --- /dev/null +++ b/tools/tests/avra/runtests.sh @@ -0,0 +1,28 @@ +#!/bin/sh -e + +AVRA=../../emul/zasm/avra + +cmpas() { + FN=$1 + EXPECTED=$(xxd ${FN%.*}.expected) + ACTUAL=$(cat ${FN} | $AVRA | xxd) + if [ "$ACTUAL" = "$EXPECTED" ]; then + echo ok + else + echo actual + echo "$ACTUAL" + echo expected + echo "$EXPECTED" + exit 1 + fi +} + +if [ ! -z $1 ]; then + cmpas $1 + exit 0 +fi + +for fn in *.asm; do + echo "Comparing ${fn}" + cmpas $fn +done diff --git a/tools/tests/avra/test1.asm b/tools/tests/avra/test1.asm new file mode 100644 index 0000000..5807c81 --- /dev/null +++ b/tools/tests/avra/test1.asm @@ -0,0 +1,5 @@ +add r1, r31 +ret +sleep +break +asr r20 diff --git a/tools/tests/avra/test1.expected b/tools/tests/avra/test1.expected new file mode 100644 index 0000000..0c60f67 --- /dev/null +++ b/tools/tests/avra/test1.expected @@ -0,0 +1 @@ +•ˆ•˜•E• \ No newline at end of file