zasm: big I/O overhaul

Instead of buffering input in memory one line at a time, we go in "just
in time" mode and always read contents directly from I/O, without
buffering.

It forces us to implement a `ioPutback` scheme, but on the other hand it
greatly simplifies cases where multiple tokens are on the same line
(when a label is directly followed by an instruction).

The end result feels much more solid and less hackish.
This commit is contained in:
Virgil Dupras 2019-05-16 07:53:42 -04:00
parent 34ff0a6c2a
commit e9244b80ee
5 changed files with 164 additions and 144 deletions

View File

@ -24,7 +24,6 @@ directiveHandlers:
handleDB: handleDB:
push hl push hl
call toWord
call readWord call readWord
ld hl, scratchpad ld hl, scratchpad
call parseLiteral call parseLiteral
@ -36,7 +35,6 @@ handleDB:
handleDW: handleDW:
push hl push hl
call toWord
call readWord call readWord
ld hl, scratchpad ld hl, scratchpad
call parseExpr call parseExpr
@ -51,7 +49,9 @@ handleDW:
handleEQU: handleEQU:
call zasmIsFirstPass call zasmIsFirstPass
jr nz, .begin jr nz, .begin
; first pass? .equ are noops ; first pass? .equ are noops Consume args and return
call readWord
call readWord
xor a xor a
ret ret
.begin: .begin:
@ -59,19 +59,14 @@ handleEQU:
push de push de
push bc push bc
; Read our constant name ; Read our constant name
call toWord
call readWord call readWord
; We can't register our symbol yet: we don't have our value! ; We can't register our symbol yet: we don't have our value!
; Let's copy it over. ; Let's copy it over.
push hl ld de, DIREC_SCRATCHPAD
ld hl, scratchpad ld bc, SCRATCHPAD_SIZE
ld de, DIREC_SCRATCHPAD ldir
ld bc, SCRATCHPAD_SIZE
ldir
pop hl
; Now, read the value associated to it ; Now, read the value associated to it
call toWord
call readWord call readWord
ld hl, scratchpad ld hl, scratchpad
call parseExpr call parseExpr
@ -102,7 +97,7 @@ getDirectiveID:
pop bc pop bc
ret ret
; Parse directive specified in A (D_* const) with args in (HL) and act in ; Parse directive specified in A (D_* const) with args in I/O and act in
; an appropriate manner. If the directive results in writing data at its ; an appropriate manner. If the directive results in writing data at its
; current location, that data is in (direcData) and A is the number of bytes ; current location, that data is in (direcData) and A is the number of bytes
; in it. ; in it.

View File

@ -708,16 +708,14 @@ getUpcode:
pop ix pop ix
ret ret
; Parse next argument in string (HL) and place it in (DE) ; Parse next argument in I/O and place it in (DE)
; Sets Z on success, reset on error. ; Sets Z on success, reset on error.
processArg: processArg:
call toWord
call readWord call readWord
; Read word is in scratchpad, HL is properly advanced. Now, let's push jr nz, .noarg
; Read word is in (HL). Now, let's push
; that HL value and replace it with (scratchpad) so that we can parse ; that HL value and replace it with (scratchpad) so that we can parse
; that arg. ; that arg.
push hl
ld hl, scratchpad
call parseArg call parseArg
cp 0xff cp 0xff
@ -733,15 +731,18 @@ processArg:
ld a, ixh ld a, ixh
ld (de), a ld (de), a
cp a ; ensure Z is set cp a ; ensure Z is set
jr .end ret
.error: .error:
call JUMP_UNSETZ call JUMP_UNSETZ
.end: ret
pop hl .noarg:
xor a
ld (de), a
ret ret
; Parse instruction specified in A (I_* const) with args in (HL) and write ; Parse instruction specified in A (I_* const) with args in I/O and write
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A. ; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
; A.
parseInstruction: parseInstruction:
push bc push bc
push hl push hl

View File

@ -1,5 +1,3 @@
; *** Consts ***
.equ IO_MAX_LINELEN 0xff
; *** Variables *** ; *** Variables ***
.equ IO_IN_GETC IO_RAMSTART .equ IO_IN_GETC IO_RAMSTART
.equ IO_IN_PUTC IO_IN_GETC+2 .equ IO_IN_PUTC IO_IN_GETC+2
@ -9,14 +7,36 @@
.equ IO_OUT_PUTC IO_OUT_GETC+2 .equ IO_OUT_PUTC IO_OUT_GETC+2
.equ IO_OUT_SEEK IO_OUT_PUTC+2 .equ IO_OUT_SEEK IO_OUT_PUTC+2
.equ IO_OUT_TELL IO_OUT_SEEK+2 .equ IO_OUT_TELL IO_OUT_SEEK+2
.equ IO_LINEBUF IO_OUT_TELL+2 ; see ioPutBack below
.equ IO_RAMEND IO_LINEBUF+IO_MAX_LINELEN+1 .equ IO_PUTBACK_BUF IO_OUT_TELL+2
.equ IO_RAMEND IO_PUTBACK_BUF+1
; *** Code *** ; *** Code ***
ioInit:
xor a
ld (IO_PUTBACK_BUF), a
ret
ioGetC: ioGetC:
ld a, (IO_PUTBACK_BUF)
or a ; cp 0
jr nz, .getback
ld ix, (IO_IN_GETC) ld ix, (IO_IN_GETC)
jp (ix) jp (ix)
.getback:
push af
xor a
ld (IO_PUTBACK_BUF), a
pop af
ret
; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
; instead of reading from IO_IN_GETC, will return that character. That's the
; easiest way I found to handle the readWord/gotoNextLine problem.
ioPutBack:
ld (IO_PUTBACK_BUF), a
ret
ioPutC: ioPutC:
ld ix, (IO_OUT_PUTC) ld ix, (IO_OUT_PUTC)
@ -26,54 +46,3 @@ ioSeek:
ld ix, (IO_IN_SEEK) ld ix, (IO_IN_SEEK)
jp (ix) jp (ix)
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
ret
; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
; Returns number of chars read in A. 0 means we're at the end of our input
; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
; We ignore empty lines and pass through them like butter.
; A null char is written at the end of the line.
ioReadLine:
push bc
; consume ioGetC as long as it yields a line end char.
.loop1:
call ioGetC
jr nz, .eof ; GetC unsets Z? We don't have a line to read,
; we have EOF.
call isLineEnd
jr z, .loop1
; A contains the first char of our line.
ld c, 1
ld (IO_LINEBUF), a
ld hl, IO_LINEBUF+1
.loop2:
call ioGetC
call isLineEnd
jr z, .success ; We have end of line
ld (hl), a
inc hl
inc c
jr .loop2
.success:
; write null char at HL before we return
xor a
ld (hl), a
ld a, c
ld hl, IO_LINEBUF
jr .end
.eof:
xor a
.end:
pop bc
ret

View File

@ -67,6 +67,7 @@ zasmMain:
ld de, IO_OUT_GETC ld de, IO_OUT_GETC
call JUMP_BLKSEL call JUMP_BLKSEL
; Init modules ; Init modules
call ioInit
call symInit call symInit
; First pass ; First pass
@ -105,17 +106,18 @@ zasmParseFile:
ld (ZASM_PC), de ld (ZASM_PC), de
.loop: .loop:
inc de inc de
call ioReadLine
or a ; is A 0?
ret z ; We have EOF
call parseLine call parseLine
ret nz ; error ret nz ; error
ld a, b ; TOK_*
cp TOK_EOF
ret z ; if EOF, return now with success
jr .loop jr .loop
; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases ; Parse next token and accompanying args (when relevant) in I/O, write the
; (ZASM_PC) by the number of bytes written. BC is set to the result of the call ; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
; to tokenize. ; bytes written. BC is set to the result of the call to tokenize.
; Sets Z if parse was successful, unset if there was an error or EOF. ; Sets Z if parse was successful, unset if there was an error. EOF is not an
; error.
parseLine: parseLine:
call tokenize call tokenize
ld a, b ; TOK_* ld a, b ; TOK_*
@ -124,22 +126,11 @@ parseLine:
cp TOK_DIRECTIVE cp TOK_DIRECTIVE
jp z, _parseDirec jp z, _parseDirec
cp TOK_LABEL cp TOK_LABEL
jr z, .label jr z, _parseLabel
cp TOK_EMPTY cp TOK_EOF
ret ; Z is correct. If empty, Z is set and not an ret ; Z is correct. If EOF, Z is set and not an
; error, otherwise, it means bad token and ; error, otherwise, it means bad token and
; errors out. ; errors out.
.label:
push hl
call _parseLabel
pop hl
ret nz ; error out
; We're finished here. However, because it's a label, it's possible that
; another logical line follows directly after the label. Let's parse
; this and propagate error.
call parseLine
; Z has proper value
ret
_parseInstr: _parseInstr:
ld a, c ; I_* ld a, c ; I_*

View File

@ -2,7 +2,7 @@
TOK_INSTR .equ 0x01 TOK_INSTR .equ 0x01
TOK_DIRECTIVE .equ 0x02 TOK_DIRECTIVE .equ 0x02
TOK_LABEL .equ 0x03 TOK_LABEL .equ 0x03
TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line TOK_EOF .equ 0xfe ; end of file
TOK_BAD .equ 0xff TOK_BAD .equ 0xff
.equ SCRATCHPAD_SIZE 0x20 .equ SCRATCHPAD_SIZE 0x20
@ -16,7 +16,17 @@ scratchpad:
isLineEndOrComment: isLineEndOrComment:
cp ';' cp ';'
ret z ret z
or a ; cp 0 ; continue to isLineEnd
; Sets Z is A is CR, LF, or null.
isLineEnd:
or a ; same as cp 0
ret z
cp 0x0d
ret z
cp 0x0a
ret z
cp '\'
ret ret
; Sets Z is A is ' ' '\t' or ',' ; Sets Z is A is ' ' '\t' or ','
@ -32,8 +42,7 @@ isSep:
isSepOrLineEnd: isSepOrLineEnd:
call isSep call isSep
ret z ret z
call isLineEndOrComment jr isLineEndOrComment
ret
; Checks whether string at (HL) is a label, that is, whether it ends with a ":" ; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
; Sets Z if yes, unset if no. ; Sets Z if yes, unset if no.
@ -63,48 +72,98 @@ isLabel:
pop hl pop hl
ret ret
; read word in (HL) and put it in (scratchpad), null terminated, for a maximum ; Read ioGetC until a word starts, then read ioGetC as long as there is no
; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is ; separator and put that contents in (scratchpad), null terminated, for a
; advanced to the next separator char. ; maximum of SCRATCHPAD_SIZE-1 characters.
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
; right there. If scratchpad is not big enough, we stop right there and error.
; HL points to scratchpad
; Sets Z if a word could be read, unsets if not.
readWord: readWord:
push bc push bc
push de ; Get to word
ld de, scratchpad .loop1:
ld b, SCRATCHPAD_SIZE-1 call ioGetC
.loop:
ld a, (hl)
call isSepOrLineEnd
jr z, .success
ld (de), a
inc hl
inc de
djnz .loop
.success:
xor a
ld (de), a
ld a, SCRATCHPAD_SIZE-1
sub a, b
.end:
pop de
pop bc
ret
; (HL) being a string, advance it to the next non-sep character.
; Set Z if we could do it before the line ended, reset Z if we couldn't.
toWord:
.loop:
ld a, (hl)
call isLineEndOrComment call isLineEndOrComment
jr z, .error jr z, .error
call isSep call isSep
jr nz, .success jr nz, .read
jr .loop1
.read:
ld hl, scratchpad
ld b, SCRATCHPAD_SIZE-1
; A contains the first letter to read
.loop2:
ld (hl), a
inc hl inc hl
jr .loop call ioGetC
call isSepOrLineEnd
jr z, .success
djnz .loop2
; out of space. error.
.error: .error:
; We need to put the last char we've read back so that gotoNextLine
; behaves properly.
call ioPutBack
call JUMP_UNSETZ call JUMP_UNSETZ
ret jr .end
.success: .success:
xor a ; ensure Z call ioPutBack
; null-terminate scratchpad
xor a
ld (hl), a
ld hl, scratchpad
.end:
pop bc
ret
; Read ioGetC until we reach the beginning of next line, skipping comments if
; necessary. This skips all whitespace, \n, \r, comments until we reach the
; first non-comment character. Then, we put it back (ioPutBack) and return.
;
; If gotoNextLine encounters anything else than whitespace, comment or line
; separator, we error out (no putback)
; Sets Z if we reached a new line. Unset if EOF or error.
gotoNextLine:
.loop1:
; first loop is "strict", that is: we error out on non-whitespace.
call ioGetC
call isSepOrLineEnd
ret nz ; error
or a ; cp 0
jr z, .eof
call isLineEnd
jr z, .loop3 ; good!
cp ';'
jr z, .loop2 ; comment starting, go to "fast lane"
jr .loop1
.loop2:
; second loop is the "comment loop": anything is valid and we just run
; until EOL.
call ioGetC
or a ; cp 0
jr z, .eof
cp '\' ; special case: '\' doesn't count as a line end
; in a comment.
jr z, .loop2
call isLineEnd
jr z, .loop3
jr .loop2
.loop3:
; Loop 3 happens after we reach our first line sep. This means that we
; wade through whitespace until we reach a non-whitespace character.
call ioGetC
or a ; cp 0
jr z, .eof
cp ';'
jr z, .loop2 ; oh, another comment! go back to loop2!
call isSepOrLineEnd
jr z, .loop3
; Non-whitespace. That's our goal! Put it back
call ioPutBack
.eof:
cp a ; ensure Z
ret ret
; Parse line in (HL) and read the next token in BC. The token is written on ; Parse line in (HL) and read the next token in BC. The token is written on
@ -113,17 +172,27 @@ toWord:
; Advance HL to after the read word. ; Advance HL to after the read word.
; If no token matches, TOK_BAD is written to B ; If no token matches, TOK_BAD is written to B
tokenize: tokenize:
call toWord
jr nz, .emptyline
call readWord call readWord
push hl ; Save advanced HL for later jr z, .process ; read successful, process into token.
ld hl, scratchpad ; Error. It could be EOL, EOF or scraptchpad size problem
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
; that's obviously what we want to do. If it's EOF, we can check
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
call gotoNextLine
jr nz, .error
or a ; Are we EOF?
jr nz, tokenize ; not EOF? then continue!
; We're EOF
ld b, TOK_EOF
ret
.process:
call isLabel call isLabel
jr z, .label jr z, .label
call getInstID call getInstID
jr z, .instr jr z, .instr
call getDirectiveID call getDirectiveID
jr z, .direc jr z, .direc
.error:
; no match ; no match
ld b, TOK_BAD ld b, TOK_BAD
jr .end jr .end
@ -137,9 +206,4 @@ tokenize:
ld b, TOK_LABEL ld b, TOK_LABEL
.end: .end:
ld c, a ld c, a
pop hl
ret
.emptyline:
ld b, TOK_EMPTY
; no HL to pop, we jumped before the push
ret ret