From e9244b80ee1585c1a74d0fac0f5c16f28a7d1ab2 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Thu, 16 May 2019 07:53:42 -0400 Subject: [PATCH] zasm: big I/O overhaul Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish. --- apps/zasm/directive.asm | 19 +++--- apps/zasm/instr.asm | 21 +++---- apps/zasm/io.asm | 79 ++++++++---------------- apps/zasm/main.asm | 33 ++++------ apps/zasm/tok.asm | 156 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 164 insertions(+), 144 deletions(-) diff --git a/apps/zasm/directive.asm b/apps/zasm/directive.asm index 29c1e9a..5082ed0 100644 --- a/apps/zasm/directive.asm +++ b/apps/zasm/directive.asm @@ -24,7 +24,6 @@ directiveHandlers: handleDB: push hl - call toWord call readWord ld hl, scratchpad call parseLiteral @@ -36,7 +35,6 @@ handleDB: handleDW: push hl - call toWord call readWord ld hl, scratchpad call parseExpr @@ -51,7 +49,9 @@ handleDW: handleEQU: call zasmIsFirstPass jr nz, .begin - ; first pass? .equ are noops + ; first pass? .equ are noops Consume args and return + call readWord + call readWord xor a ret .begin: @@ -59,19 +59,14 @@ handleEQU: push de push bc ; Read our constant name - call toWord call readWord ; We can't register our symbol yet: we don't have our value! ; Let's copy it over. - push hl - ld hl, scratchpad - ld de, DIREC_SCRATCHPAD - ld bc, SCRATCHPAD_SIZE - ldir - pop hl + ld de, DIREC_SCRATCHPAD + ld bc, SCRATCHPAD_SIZE + ldir ; Now, read the value associated to it - call toWord call readWord ld hl, scratchpad call parseExpr @@ -102,7 +97,7 @@ getDirectiveID: pop bc ret -; Parse directive specified in A (D_* const) with args in (HL) and act in +; Parse directive specified in A (D_* const) with args in I/O and act in ; an appropriate manner. If the directive results in writing data at its ; current location, that data is in (direcData) and A is the number of bytes ; in it. diff --git a/apps/zasm/instr.asm b/apps/zasm/instr.asm index 33ce03b..c40a1a5 100644 --- a/apps/zasm/instr.asm +++ b/apps/zasm/instr.asm @@ -708,16 +708,14 @@ getUpcode: pop ix ret -; Parse next argument in string (HL) and place it in (DE) +; Parse next argument in I/O and place it in (DE) ; Sets Z on success, reset on error. processArg: - call toWord call readWord - ; Read word is in scratchpad, HL is properly advanced. Now, let's push + jr nz, .noarg + ; Read word is in (HL). Now, let's push ; that HL value and replace it with (scratchpad) so that we can parse ; that arg. - push hl - ld hl, scratchpad call parseArg cp 0xff @@ -733,15 +731,18 @@ processArg: ld a, ixh ld (de), a cp a ; ensure Z is set - jr .end + ret .error: call JUMP_UNSETZ -.end: - pop hl + ret +.noarg: + xor a + ld (de), a ret -; Parse instruction specified in A (I_* const) with args in (HL) and write -; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A. +; Parse instruction specified in A (I_* const) with args in I/O and write +; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in +; A. parseInstruction: push bc push hl diff --git a/apps/zasm/io.asm b/apps/zasm/io.asm index 7d8bdd8..00d6b41 100644 --- a/apps/zasm/io.asm +++ b/apps/zasm/io.asm @@ -1,5 +1,3 @@ -; *** Consts *** -.equ IO_MAX_LINELEN 0xff ; *** Variables *** .equ IO_IN_GETC IO_RAMSTART .equ IO_IN_PUTC IO_IN_GETC+2 @@ -9,14 +7,36 @@ .equ IO_OUT_PUTC IO_OUT_GETC+2 .equ IO_OUT_SEEK IO_OUT_PUTC+2 .equ IO_OUT_TELL IO_OUT_SEEK+2 -.equ IO_LINEBUF IO_OUT_TELL+2 -.equ IO_RAMEND IO_LINEBUF+IO_MAX_LINELEN+1 +; see ioPutBack below +.equ IO_PUTBACK_BUF IO_OUT_TELL+2 +.equ IO_RAMEND IO_PUTBACK_BUF+1 ; *** Code *** +ioInit: + xor a + ld (IO_PUTBACK_BUF), a + ret + ioGetC: + ld a, (IO_PUTBACK_BUF) + or a ; cp 0 + jr nz, .getback ld ix, (IO_IN_GETC) jp (ix) +.getback: + push af + xor a + ld (IO_PUTBACK_BUF), a + pop af + ret + +; Put back non-zero character A into the "ioGetC stack". The next ioGetC call, +; instead of reading from IO_IN_GETC, will return that character. That's the +; easiest way I found to handle the readWord/gotoNextLine problem. +ioPutBack: + ld (IO_PUTBACK_BUF), a + ret ioPutC: ld ix, (IO_OUT_PUTC) @@ -26,54 +46,3 @@ ioSeek: ld ix, (IO_IN_SEEK) jp (ix) -; Sets Z is A is CR, LF, or null. -isLineEnd: - or a ; same as cp 0 - ret z - cp 0x0d - ret z - cp 0x0a - ret z - cp '\' - ret - -; Read a single line from ioGetCPtr and place it in IO_LINEBUF. -; Returns number of chars read in A. 0 means we're at the end of our input -; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF. -; We ignore empty lines and pass through them like butter. -; A null char is written at the end of the line. -ioReadLine: - push bc - ; consume ioGetC as long as it yields a line end char. -.loop1: - call ioGetC - jr nz, .eof ; GetC unsets Z? We don't have a line to read, - ; we have EOF. - call isLineEnd - jr z, .loop1 - ; A contains the first char of our line. - ld c, 1 - ld (IO_LINEBUF), a - ld hl, IO_LINEBUF+1 -.loop2: - call ioGetC - call isLineEnd - jr z, .success ; We have end of line - ld (hl), a - inc hl - inc c - jr .loop2 - -.success: - ; write null char at HL before we return - xor a - ld (hl), a - ld a, c - ld hl, IO_LINEBUF - jr .end -.eof: - xor a -.end: - pop bc - ret - diff --git a/apps/zasm/main.asm b/apps/zasm/main.asm index 71f815e..d9960b3 100644 --- a/apps/zasm/main.asm +++ b/apps/zasm/main.asm @@ -67,6 +67,7 @@ zasmMain: ld de, IO_OUT_GETC call JUMP_BLKSEL ; Init modules + call ioInit call symInit ; First pass @@ -105,17 +106,18 @@ zasmParseFile: ld (ZASM_PC), de .loop: inc de - call ioReadLine - or a ; is A 0? - ret z ; We have EOF call parseLine ret nz ; error + ld a, b ; TOK_* + cp TOK_EOF + ret z ; if EOF, return now with success jr .loop -; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases -; (ZASM_PC) by the number of bytes written. BC is set to the result of the call -; to tokenize. -; Sets Z if parse was successful, unset if there was an error or EOF. +; Parse next token and accompanying args (when relevant) in I/O, write the +; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of +; bytes written. BC is set to the result of the call to tokenize. +; Sets Z if parse was successful, unset if there was an error. EOF is not an +; error. parseLine: call tokenize ld a, b ; TOK_* @@ -124,22 +126,11 @@ parseLine: cp TOK_DIRECTIVE jp z, _parseDirec cp TOK_LABEL - jr z, .label - cp TOK_EMPTY - ret ; Z is correct. If empty, Z is set and not an + jr z, _parseLabel + cp TOK_EOF + ret ; Z is correct. If EOF, Z is set and not an ; error, otherwise, it means bad token and ; errors out. -.label: - push hl - call _parseLabel - pop hl - ret nz ; error out - ; We're finished here. However, because it's a label, it's possible that - ; another logical line follows directly after the label. Let's parse - ; this and propagate error. - call parseLine - ; Z has proper value - ret _parseInstr: ld a, c ; I_* diff --git a/apps/zasm/tok.asm b/apps/zasm/tok.asm index 76f06bd..e0012e1 100644 --- a/apps/zasm/tok.asm +++ b/apps/zasm/tok.asm @@ -2,7 +2,7 @@ TOK_INSTR .equ 0x01 TOK_DIRECTIVE .equ 0x02 TOK_LABEL .equ 0x03 -TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line +TOK_EOF .equ 0xfe ; end of file TOK_BAD .equ 0xff .equ SCRATCHPAD_SIZE 0x20 @@ -16,7 +16,17 @@ scratchpad: isLineEndOrComment: cp ';' ret z - or a ; cp 0 + ; continue to isLineEnd + +; Sets Z is A is CR, LF, or null. +isLineEnd: + or a ; same as cp 0 + ret z + cp 0x0d + ret z + cp 0x0a + ret z + cp '\' ret ; Sets Z is A is ' ' '\t' or ',' @@ -32,8 +42,7 @@ isSep: isSepOrLineEnd: call isSep ret z - call isLineEndOrComment - ret + jr isLineEndOrComment ; Checks whether string at (HL) is a label, that is, whether it ends with a ":" ; Sets Z if yes, unset if no. @@ -63,48 +72,98 @@ isLabel: pop hl ret -; read word in (HL) and put it in (scratchpad), null terminated, for a maximum -; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is -; advanced to the next separator char. +; Read ioGetC until a word starts, then read ioGetC as long as there is no +; separator and put that contents in (scratchpad), null terminated, for a +; maximum of SCRATCHPAD_SIZE-1 characters. +; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop +; right there. If scratchpad is not big enough, we stop right there and error. +; HL points to scratchpad +; Sets Z if a word could be read, unsets if not. readWord: push bc - push de - ld de, scratchpad - ld b, SCRATCHPAD_SIZE-1 -.loop: - ld a, (hl) - call isSepOrLineEnd - jr z, .success - ld (de), a - inc hl - inc de - djnz .loop -.success: - xor a - ld (de), a - ld a, SCRATCHPAD_SIZE-1 - sub a, b -.end: - pop de - pop bc - ret - -; (HL) being a string, advance it to the next non-sep character. -; Set Z if we could do it before the line ended, reset Z if we couldn't. -toWord: -.loop: - ld a, (hl) + ; Get to word +.loop1: + call ioGetC call isLineEndOrComment jr z, .error call isSep - jr nz, .success + jr nz, .read + jr .loop1 +.read: + ld hl, scratchpad + ld b, SCRATCHPAD_SIZE-1 + ; A contains the first letter to read +.loop2: + ld (hl), a inc hl - jr .loop + call ioGetC + call isSepOrLineEnd + jr z, .success + djnz .loop2 + ; out of space. error. .error: + ; We need to put the last char we've read back so that gotoNextLine + ; behaves properly. + call ioPutBack call JUMP_UNSETZ - ret + jr .end .success: - xor a ; ensure Z + call ioPutBack + ; null-terminate scratchpad + xor a + ld (hl), a + ld hl, scratchpad +.end: + pop bc + ret + +; Read ioGetC until we reach the beginning of next line, skipping comments if +; necessary. This skips all whitespace, \n, \r, comments until we reach the +; first non-comment character. Then, we put it back (ioPutBack) and return. +; +; If gotoNextLine encounters anything else than whitespace, comment or line +; separator, we error out (no putback) + +; Sets Z if we reached a new line. Unset if EOF or error. +gotoNextLine: +.loop1: + ; first loop is "strict", that is: we error out on non-whitespace. + call ioGetC + call isSepOrLineEnd + ret nz ; error + or a ; cp 0 + jr z, .eof + call isLineEnd + jr z, .loop3 ; good! + cp ';' + jr z, .loop2 ; comment starting, go to "fast lane" + jr .loop1 +.loop2: + ; second loop is the "comment loop": anything is valid and we just run + ; until EOL. + call ioGetC + or a ; cp 0 + jr z, .eof + cp '\' ; special case: '\' doesn't count as a line end + ; in a comment. + jr z, .loop2 + call isLineEnd + jr z, .loop3 + jr .loop2 +.loop3: + ; Loop 3 happens after we reach our first line sep. This means that we + ; wade through whitespace until we reach a non-whitespace character. + call ioGetC + or a ; cp 0 + jr z, .eof + cp ';' + jr z, .loop2 ; oh, another comment! go back to loop2! + call isSepOrLineEnd + jr z, .loop3 + ; Non-whitespace. That's our goal! Put it back + call ioPutBack +.eof: + cp a ; ensure Z ret ; Parse line in (HL) and read the next token in BC. The token is written on @@ -113,17 +172,27 @@ toWord: ; Advance HL to after the read word. ; If no token matches, TOK_BAD is written to B tokenize: - call toWord - jr nz, .emptyline call readWord - push hl ; Save advanced HL for later - ld hl, scratchpad + jr z, .process ; read successful, process into token. + ; Error. It could be EOL, EOF or scraptchpad size problem + ; Whatever it is, calling gotoNextLine is appropriate. If it's EOL + ; that's obviously what we want to do. If it's EOF, we can check + ; it after. If it's a scratchpad overrun, gotoNextLine handles it. + call gotoNextLine + jr nz, .error + or a ; Are we EOF? + jr nz, tokenize ; not EOF? then continue! + ; We're EOF + ld b, TOK_EOF + ret +.process: call isLabel jr z, .label call getInstID jr z, .instr call getDirectiveID jr z, .direc +.error: ; no match ld b, TOK_BAD jr .end @@ -137,9 +206,4 @@ tokenize: ld b, TOK_LABEL .end: ld c, a - pop hl - ret -.emptyline: - ld b, TOK_EMPTY - ; no HL to pop, we jumped before the push ret