zasm: big I/O overhaul

Instead of buffering input in memory one line at a time, we go in "just
in time" mode and always read contents directly from I/O, without
buffering.

It forces us to implement a `ioPutback` scheme, but on the other hand it
greatly simplifies cases where multiple tokens are on the same line
(when a label is directly followed by an instruction).

The end result feels much more solid and less hackish.
This commit is contained in:
Virgil Dupras 2019-05-16 07:53:42 -04:00
parent 34ff0a6c2a
commit e9244b80ee
5 changed files with 164 additions and 144 deletions

View File

@ -24,7 +24,6 @@ directiveHandlers:
handleDB:
push hl
call toWord
call readWord
ld hl, scratchpad
call parseLiteral
@ -36,7 +35,6 @@ handleDB:
handleDW:
push hl
call toWord
call readWord
ld hl, scratchpad
call parseExpr
@ -51,7 +49,9 @@ handleDW:
handleEQU:
call zasmIsFirstPass
jr nz, .begin
; first pass? .equ are noops
; first pass? .equ are noops Consume args and return
call readWord
call readWord
xor a
ret
.begin:
@ -59,19 +59,14 @@ handleEQU:
push de
push bc
; Read our constant name
call toWord
call readWord
; We can't register our symbol yet: we don't have our value!
; Let's copy it over.
push hl
ld hl, scratchpad
ld de, DIREC_SCRATCHPAD
ld bc, SCRATCHPAD_SIZE
ldir
pop hl
; Now, read the value associated to it
call toWord
call readWord
ld hl, scratchpad
call parseExpr
@ -102,7 +97,7 @@ getDirectiveID:
pop bc
ret
; Parse directive specified in A (D_* const) with args in (HL) and act in
; Parse directive specified in A (D_* const) with args in I/O and act in
; an appropriate manner. If the directive results in writing data at its
; current location, that data is in (direcData) and A is the number of bytes
; in it.

View File

@ -708,16 +708,14 @@ getUpcode:
pop ix
ret
; Parse next argument in string (HL) and place it in (DE)
; Parse next argument in I/O and place it in (DE)
; Sets Z on success, reset on error.
processArg:
call toWord
call readWord
; Read word is in scratchpad, HL is properly advanced. Now, let's push
jr nz, .noarg
; Read word is in (HL). Now, let's push
; that HL value and replace it with (scratchpad) so that we can parse
; that arg.
push hl
ld hl, scratchpad
call parseArg
cp 0xff
@ -733,15 +731,18 @@ processArg:
ld a, ixh
ld (de), a
cp a ; ensure Z is set
jr .end
ret
.error:
call JUMP_UNSETZ
.end:
pop hl
ret
.noarg:
xor a
ld (de), a
ret
; Parse instruction specified in A (I_* const) with args in (HL) and write
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A.
; Parse instruction specified in A (I_* const) with args in I/O and write
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
; A.
parseInstruction:
push bc
push hl

View File

@ -1,5 +1,3 @@
; *** Consts ***
.equ IO_MAX_LINELEN 0xff
; *** Variables ***
.equ IO_IN_GETC IO_RAMSTART
.equ IO_IN_PUTC IO_IN_GETC+2
@ -9,14 +7,36 @@
.equ IO_OUT_PUTC IO_OUT_GETC+2
.equ IO_OUT_SEEK IO_OUT_PUTC+2
.equ IO_OUT_TELL IO_OUT_SEEK+2
.equ IO_LINEBUF IO_OUT_TELL+2
.equ IO_RAMEND IO_LINEBUF+IO_MAX_LINELEN+1
; see ioPutBack below
.equ IO_PUTBACK_BUF IO_OUT_TELL+2
.equ IO_RAMEND IO_PUTBACK_BUF+1
; *** Code ***
ioInit:
xor a
ld (IO_PUTBACK_BUF), a
ret
ioGetC:
ld a, (IO_PUTBACK_BUF)
or a ; cp 0
jr nz, .getback
ld ix, (IO_IN_GETC)
jp (ix)
.getback:
push af
xor a
ld (IO_PUTBACK_BUF), a
pop af
ret
; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
; instead of reading from IO_IN_GETC, will return that character. That's the
; easiest way I found to handle the readWord/gotoNextLine problem.
ioPutBack:
ld (IO_PUTBACK_BUF), a
ret
ioPutC:
ld ix, (IO_OUT_PUTC)
@ -26,54 +46,3 @@ ioSeek:
ld ix, (IO_IN_SEEK)
jp (ix)
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
ret
; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
; Returns number of chars read in A. 0 means we're at the end of our input
; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
; We ignore empty lines and pass through them like butter.
; A null char is written at the end of the line.
ioReadLine:
push bc
; consume ioGetC as long as it yields a line end char.
.loop1:
call ioGetC
jr nz, .eof ; GetC unsets Z? We don't have a line to read,
; we have EOF.
call isLineEnd
jr z, .loop1
; A contains the first char of our line.
ld c, 1
ld (IO_LINEBUF), a
ld hl, IO_LINEBUF+1
.loop2:
call ioGetC
call isLineEnd
jr z, .success ; We have end of line
ld (hl), a
inc hl
inc c
jr .loop2
.success:
; write null char at HL before we return
xor a
ld (hl), a
ld a, c
ld hl, IO_LINEBUF
jr .end
.eof:
xor a
.end:
pop bc
ret

View File

@ -67,6 +67,7 @@ zasmMain:
ld de, IO_OUT_GETC
call JUMP_BLKSEL
; Init modules
call ioInit
call symInit
; First pass
@ -105,17 +106,18 @@ zasmParseFile:
ld (ZASM_PC), de
.loop:
inc de
call ioReadLine
or a ; is A 0?
ret z ; We have EOF
call parseLine
ret nz ; error
ld a, b ; TOK_*
cp TOK_EOF
ret z ; if EOF, return now with success
jr .loop
; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases
; (ZASM_PC) by the number of bytes written. BC is set to the result of the call
; to tokenize.
; Sets Z if parse was successful, unset if there was an error or EOF.
; Parse next token and accompanying args (when relevant) in I/O, write the
; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
; bytes written. BC is set to the result of the call to tokenize.
; Sets Z if parse was successful, unset if there was an error. EOF is not an
; error.
parseLine:
call tokenize
ld a, b ; TOK_*
@ -124,22 +126,11 @@ parseLine:
cp TOK_DIRECTIVE
jp z, _parseDirec
cp TOK_LABEL
jr z, .label
cp TOK_EMPTY
ret ; Z is correct. If empty, Z is set and not an
jr z, _parseLabel
cp TOK_EOF
ret ; Z is correct. If EOF, Z is set and not an
; error, otherwise, it means bad token and
; errors out.
.label:
push hl
call _parseLabel
pop hl
ret nz ; error out
; We're finished here. However, because it's a label, it's possible that
; another logical line follows directly after the label. Let's parse
; this and propagate error.
call parseLine
; Z has proper value
ret
_parseInstr:
ld a, c ; I_*

View File

@ -2,7 +2,7 @@
TOK_INSTR .equ 0x01
TOK_DIRECTIVE .equ 0x02
TOK_LABEL .equ 0x03
TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line
TOK_EOF .equ 0xfe ; end of file
TOK_BAD .equ 0xff
.equ SCRATCHPAD_SIZE 0x20
@ -16,7 +16,17 @@ scratchpad:
isLineEndOrComment:
cp ';'
ret z
or a ; cp 0
; continue to isLineEnd
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
ret
; Sets Z is A is ' ' '\t' or ','
@ -32,8 +42,7 @@ isSep:
isSepOrLineEnd:
call isSep
ret z
call isLineEndOrComment
ret
jr isLineEndOrComment
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no.
@ -63,48 +72,98 @@ isLabel:
pop hl
ret
; read word in (HL) and put it in (scratchpad), null terminated, for a maximum
; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is
; advanced to the next separator char.
; Read ioGetC until a word starts, then read ioGetC as long as there is no
; separator and put that contents in (scratchpad), null terminated, for a
; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
readWord:
push bc
push de
ld de, scratchpad
ld b, SCRATCHPAD_SIZE-1
.loop:
ld a, (hl)
call isSepOrLineEnd
jr z, .success
ld (de), a
inc hl
inc de
djnz .loop
.success:
xor a
ld (de), a
ld a, SCRATCHPAD_SIZE-1
sub a, b
.end:
pop de
pop bc
ret
; (HL) being a string, advance it to the next non-sep character.
; Set Z if we could do it before the line ended, reset Z if we couldn't.
toWord:
.loop:
ld a, (hl)
; Get to word
.loop1:
call ioGetC
call isLineEndOrComment
jr z, .error
call isSep
jr nz, .success
jr nz, .read
jr .loop1
.read:
ld hl, scratchpad
ld b, SCRATCHPAD_SIZE-1
; A contains the first letter to read
.loop2:
ld (hl), a
inc hl
jr .loop
call ioGetC
call isSepOrLineEnd
jr z, .success
djnz .loop2
; out of space. error.
.error:
; We need to put the last char we've read back so that gotoNextLine
; behaves properly.
call ioPutBack
call JUMP_UNSETZ
ret
jr .end
.success:
xor a ; ensure Z
call ioPutBack
; null-terminate scratchpad
xor a
ld (hl), a
ld hl, scratchpad
.end:
pop bc
ret
; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)
; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
; first loop is "strict", that is: we error out on non-whitespace.
call ioGetC
call isSepOrLineEnd
ret nz ; error
or a ; cp 0
jr z, .eof
call isLineEnd
jr z, .loop3 ; good!
cp ';'
jr z, .loop2 ; comment starting, go to "fast lane"
jr .loop1
.loop2:
; second loop is the "comment loop": anything is valid and we just run
; until EOL.
call ioGetC
or a ; cp 0
jr z, .eof
cp '\' ; special case: '\' doesn't count as a line end
; in a comment.
jr z, .loop2
call isLineEnd
jr z, .loop3
jr .loop2
.loop3:
; Loop 3 happens after we reach our first line sep. This means that we
; wade through whitespace until we reach a non-whitespace character.
call ioGetC
or a ; cp 0
jr z, .eof
cp ';'
jr z, .loop2 ; oh, another comment! go back to loop2!
call isSepOrLineEnd
jr z, .loop3
; Non-whitespace. That's our goal! Put it back
call ioPutBack
.eof:
cp a ; ensure Z
ret
; Parse line in (HL) and read the next token in BC. The token is written on
@ -113,17 +172,27 @@ toWord:
; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B
tokenize:
call toWord
jr nz, .emptyline
call readWord
push hl ; Save advanced HL for later
ld hl, scratchpad
jr z, .process ; read successful, process into token.
; Error. It could be EOL, EOF or scraptchpad size problem
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
; that's obviously what we want to do. If it's EOF, we can check
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
call gotoNextLine
jr nz, .error
or a ; Are we EOF?
jr nz, tokenize ; not EOF? then continue!
; We're EOF
ld b, TOK_EOF
ret
.process:
call isLabel
jr z, .label
call getInstID
jr z, .instr
call getDirectiveID
jr z, .direc
.error:
; no match
ld b, TOK_BAD
jr .end
@ -137,9 +206,4 @@ tokenize:
ld b, TOK_LABEL
.end:
ld c, a
pop hl
ret
.emptyline:
ld b, TOK_EMPTY
; no HL to pop, we jumped before the push
ret