zasm: big I/O overhaul
Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish.
This commit is contained in:
parent
34ff0a6c2a
commit
e9244b80ee
@ -24,7 +24,6 @@ directiveHandlers:
|
||||
|
||||
handleDB:
|
||||
push hl
|
||||
call toWord
|
||||
call readWord
|
||||
ld hl, scratchpad
|
||||
call parseLiteral
|
||||
@ -36,7 +35,6 @@ handleDB:
|
||||
|
||||
handleDW:
|
||||
push hl
|
||||
call toWord
|
||||
call readWord
|
||||
ld hl, scratchpad
|
||||
call parseExpr
|
||||
@ -51,7 +49,9 @@ handleDW:
|
||||
handleEQU:
|
||||
call zasmIsFirstPass
|
||||
jr nz, .begin
|
||||
; first pass? .equ are noops
|
||||
; first pass? .equ are noops Consume args and return
|
||||
call readWord
|
||||
call readWord
|
||||
xor a
|
||||
ret
|
||||
.begin:
|
||||
@ -59,19 +59,14 @@ handleEQU:
|
||||
push de
|
||||
push bc
|
||||
; Read our constant name
|
||||
call toWord
|
||||
call readWord
|
||||
; We can't register our symbol yet: we don't have our value!
|
||||
; Let's copy it over.
|
||||
push hl
|
||||
ld hl, scratchpad
|
||||
ld de, DIREC_SCRATCHPAD
|
||||
ld bc, SCRATCHPAD_SIZE
|
||||
ldir
|
||||
pop hl
|
||||
|
||||
; Now, read the value associated to it
|
||||
call toWord
|
||||
call readWord
|
||||
ld hl, scratchpad
|
||||
call parseExpr
|
||||
@ -102,7 +97,7 @@ getDirectiveID:
|
||||
pop bc
|
||||
ret
|
||||
|
||||
; Parse directive specified in A (D_* const) with args in (HL) and act in
|
||||
; Parse directive specified in A (D_* const) with args in I/O and act in
|
||||
; an appropriate manner. If the directive results in writing data at its
|
||||
; current location, that data is in (direcData) and A is the number of bytes
|
||||
; in it.
|
||||
|
@ -708,16 +708,14 @@ getUpcode:
|
||||
pop ix
|
||||
ret
|
||||
|
||||
; Parse next argument in string (HL) and place it in (DE)
|
||||
; Parse next argument in I/O and place it in (DE)
|
||||
; Sets Z on success, reset on error.
|
||||
processArg:
|
||||
call toWord
|
||||
call readWord
|
||||
; Read word is in scratchpad, HL is properly advanced. Now, let's push
|
||||
jr nz, .noarg
|
||||
; Read word is in (HL). Now, let's push
|
||||
; that HL value and replace it with (scratchpad) so that we can parse
|
||||
; that arg.
|
||||
push hl
|
||||
ld hl, scratchpad
|
||||
|
||||
call parseArg
|
||||
cp 0xff
|
||||
@ -733,15 +731,18 @@ processArg:
|
||||
ld a, ixh
|
||||
ld (de), a
|
||||
cp a ; ensure Z is set
|
||||
jr .end
|
||||
ret
|
||||
.error:
|
||||
call JUMP_UNSETZ
|
||||
.end:
|
||||
pop hl
|
||||
ret
|
||||
.noarg:
|
||||
xor a
|
||||
ld (de), a
|
||||
ret
|
||||
|
||||
; Parse instruction specified in A (I_* const) with args in (HL) and write
|
||||
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A.
|
||||
; Parse instruction specified in A (I_* const) with args in I/O and write
|
||||
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
|
||||
; A.
|
||||
parseInstruction:
|
||||
push bc
|
||||
push hl
|
||||
|
@ -1,5 +1,3 @@
|
||||
; *** Consts ***
|
||||
.equ IO_MAX_LINELEN 0xff
|
||||
; *** Variables ***
|
||||
.equ IO_IN_GETC IO_RAMSTART
|
||||
.equ IO_IN_PUTC IO_IN_GETC+2
|
||||
@ -9,14 +7,36 @@
|
||||
.equ IO_OUT_PUTC IO_OUT_GETC+2
|
||||
.equ IO_OUT_SEEK IO_OUT_PUTC+2
|
||||
.equ IO_OUT_TELL IO_OUT_SEEK+2
|
||||
.equ IO_LINEBUF IO_OUT_TELL+2
|
||||
.equ IO_RAMEND IO_LINEBUF+IO_MAX_LINELEN+1
|
||||
; see ioPutBack below
|
||||
.equ IO_PUTBACK_BUF IO_OUT_TELL+2
|
||||
.equ IO_RAMEND IO_PUTBACK_BUF+1
|
||||
|
||||
; *** Code ***
|
||||
|
||||
ioInit:
|
||||
xor a
|
||||
ld (IO_PUTBACK_BUF), a
|
||||
ret
|
||||
|
||||
ioGetC:
|
||||
ld a, (IO_PUTBACK_BUF)
|
||||
or a ; cp 0
|
||||
jr nz, .getback
|
||||
ld ix, (IO_IN_GETC)
|
||||
jp (ix)
|
||||
.getback:
|
||||
push af
|
||||
xor a
|
||||
ld (IO_PUTBACK_BUF), a
|
||||
pop af
|
||||
ret
|
||||
|
||||
; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
|
||||
; instead of reading from IO_IN_GETC, will return that character. That's the
|
||||
; easiest way I found to handle the readWord/gotoNextLine problem.
|
||||
ioPutBack:
|
||||
ld (IO_PUTBACK_BUF), a
|
||||
ret
|
||||
|
||||
ioPutC:
|
||||
ld ix, (IO_OUT_PUTC)
|
||||
@ -26,54 +46,3 @@ ioSeek:
|
||||
ld ix, (IO_IN_SEEK)
|
||||
jp (ix)
|
||||
|
||||
; Sets Z is A is CR, LF, or null.
|
||||
isLineEnd:
|
||||
or a ; same as cp 0
|
||||
ret z
|
||||
cp 0x0d
|
||||
ret z
|
||||
cp 0x0a
|
||||
ret z
|
||||
cp '\'
|
||||
ret
|
||||
|
||||
; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
|
||||
; Returns number of chars read in A. 0 means we're at the end of our input
|
||||
; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
|
||||
; We ignore empty lines and pass through them like butter.
|
||||
; A null char is written at the end of the line.
|
||||
ioReadLine:
|
||||
push bc
|
||||
; consume ioGetC as long as it yields a line end char.
|
||||
.loop1:
|
||||
call ioGetC
|
||||
jr nz, .eof ; GetC unsets Z? We don't have a line to read,
|
||||
; we have EOF.
|
||||
call isLineEnd
|
||||
jr z, .loop1
|
||||
; A contains the first char of our line.
|
||||
ld c, 1
|
||||
ld (IO_LINEBUF), a
|
||||
ld hl, IO_LINEBUF+1
|
||||
.loop2:
|
||||
call ioGetC
|
||||
call isLineEnd
|
||||
jr z, .success ; We have end of line
|
||||
ld (hl), a
|
||||
inc hl
|
||||
inc c
|
||||
jr .loop2
|
||||
|
||||
.success:
|
||||
; write null char at HL before we return
|
||||
xor a
|
||||
ld (hl), a
|
||||
ld a, c
|
||||
ld hl, IO_LINEBUF
|
||||
jr .end
|
||||
.eof:
|
||||
xor a
|
||||
.end:
|
||||
pop bc
|
||||
ret
|
||||
|
||||
|
@ -67,6 +67,7 @@ zasmMain:
|
||||
ld de, IO_OUT_GETC
|
||||
call JUMP_BLKSEL
|
||||
; Init modules
|
||||
call ioInit
|
||||
call symInit
|
||||
|
||||
; First pass
|
||||
@ -105,17 +106,18 @@ zasmParseFile:
|
||||
ld (ZASM_PC), de
|
||||
.loop:
|
||||
inc de
|
||||
call ioReadLine
|
||||
or a ; is A 0?
|
||||
ret z ; We have EOF
|
||||
call parseLine
|
||||
ret nz ; error
|
||||
ld a, b ; TOK_*
|
||||
cp TOK_EOF
|
||||
ret z ; if EOF, return now with success
|
||||
jr .loop
|
||||
|
||||
; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases
|
||||
; (ZASM_PC) by the number of bytes written. BC is set to the result of the call
|
||||
; to tokenize.
|
||||
; Sets Z if parse was successful, unset if there was an error or EOF.
|
||||
; Parse next token and accompanying args (when relevant) in I/O, write the
|
||||
; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
|
||||
; bytes written. BC is set to the result of the call to tokenize.
|
||||
; Sets Z if parse was successful, unset if there was an error. EOF is not an
|
||||
; error.
|
||||
parseLine:
|
||||
call tokenize
|
||||
ld a, b ; TOK_*
|
||||
@ -124,22 +126,11 @@ parseLine:
|
||||
cp TOK_DIRECTIVE
|
||||
jp z, _parseDirec
|
||||
cp TOK_LABEL
|
||||
jr z, .label
|
||||
cp TOK_EMPTY
|
||||
ret ; Z is correct. If empty, Z is set and not an
|
||||
jr z, _parseLabel
|
||||
cp TOK_EOF
|
||||
ret ; Z is correct. If EOF, Z is set and not an
|
||||
; error, otherwise, it means bad token and
|
||||
; errors out.
|
||||
.label:
|
||||
push hl
|
||||
call _parseLabel
|
||||
pop hl
|
||||
ret nz ; error out
|
||||
; We're finished here. However, because it's a label, it's possible that
|
||||
; another logical line follows directly after the label. Let's parse
|
||||
; this and propagate error.
|
||||
call parseLine
|
||||
; Z has proper value
|
||||
ret
|
||||
|
||||
_parseInstr:
|
||||
ld a, c ; I_*
|
||||
|
@ -2,7 +2,7 @@
|
||||
TOK_INSTR .equ 0x01
|
||||
TOK_DIRECTIVE .equ 0x02
|
||||
TOK_LABEL .equ 0x03
|
||||
TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line
|
||||
TOK_EOF .equ 0xfe ; end of file
|
||||
TOK_BAD .equ 0xff
|
||||
|
||||
.equ SCRATCHPAD_SIZE 0x20
|
||||
@ -16,7 +16,17 @@ scratchpad:
|
||||
isLineEndOrComment:
|
||||
cp ';'
|
||||
ret z
|
||||
or a ; cp 0
|
||||
; continue to isLineEnd
|
||||
|
||||
; Sets Z is A is CR, LF, or null.
|
||||
isLineEnd:
|
||||
or a ; same as cp 0
|
||||
ret z
|
||||
cp 0x0d
|
||||
ret z
|
||||
cp 0x0a
|
||||
ret z
|
||||
cp '\'
|
||||
ret
|
||||
|
||||
; Sets Z is A is ' ' '\t' or ','
|
||||
@ -32,8 +42,7 @@ isSep:
|
||||
isSepOrLineEnd:
|
||||
call isSep
|
||||
ret z
|
||||
call isLineEndOrComment
|
||||
ret
|
||||
jr isLineEndOrComment
|
||||
|
||||
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
|
||||
; Sets Z if yes, unset if no.
|
||||
@ -63,48 +72,98 @@ isLabel:
|
||||
pop hl
|
||||
ret
|
||||
|
||||
; read word in (HL) and put it in (scratchpad), null terminated, for a maximum
|
||||
; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is
|
||||
; advanced to the next separator char.
|
||||
; Read ioGetC until a word starts, then read ioGetC as long as there is no
|
||||
; separator and put that contents in (scratchpad), null terminated, for a
|
||||
; maximum of SCRATCHPAD_SIZE-1 characters.
|
||||
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
|
||||
; right there. If scratchpad is not big enough, we stop right there and error.
|
||||
; HL points to scratchpad
|
||||
; Sets Z if a word could be read, unsets if not.
|
||||
readWord:
|
||||
push bc
|
||||
push de
|
||||
ld de, scratchpad
|
||||
ld b, SCRATCHPAD_SIZE-1
|
||||
.loop:
|
||||
ld a, (hl)
|
||||
call isSepOrLineEnd
|
||||
jr z, .success
|
||||
ld (de), a
|
||||
inc hl
|
||||
inc de
|
||||
djnz .loop
|
||||
.success:
|
||||
xor a
|
||||
ld (de), a
|
||||
ld a, SCRATCHPAD_SIZE-1
|
||||
sub a, b
|
||||
.end:
|
||||
pop de
|
||||
pop bc
|
||||
ret
|
||||
|
||||
; (HL) being a string, advance it to the next non-sep character.
|
||||
; Set Z if we could do it before the line ended, reset Z if we couldn't.
|
||||
toWord:
|
||||
.loop:
|
||||
ld a, (hl)
|
||||
; Get to word
|
||||
.loop1:
|
||||
call ioGetC
|
||||
call isLineEndOrComment
|
||||
jr z, .error
|
||||
call isSep
|
||||
jr nz, .success
|
||||
jr nz, .read
|
||||
jr .loop1
|
||||
.read:
|
||||
ld hl, scratchpad
|
||||
ld b, SCRATCHPAD_SIZE-1
|
||||
; A contains the first letter to read
|
||||
.loop2:
|
||||
ld (hl), a
|
||||
inc hl
|
||||
jr .loop
|
||||
call ioGetC
|
||||
call isSepOrLineEnd
|
||||
jr z, .success
|
||||
djnz .loop2
|
||||
; out of space. error.
|
||||
.error:
|
||||
; We need to put the last char we've read back so that gotoNextLine
|
||||
; behaves properly.
|
||||
call ioPutBack
|
||||
call JUMP_UNSETZ
|
||||
ret
|
||||
jr .end
|
||||
.success:
|
||||
xor a ; ensure Z
|
||||
call ioPutBack
|
||||
; null-terminate scratchpad
|
||||
xor a
|
||||
ld (hl), a
|
||||
ld hl, scratchpad
|
||||
.end:
|
||||
pop bc
|
||||
ret
|
||||
|
||||
; Read ioGetC until we reach the beginning of next line, skipping comments if
|
||||
; necessary. This skips all whitespace, \n, \r, comments until we reach the
|
||||
; first non-comment character. Then, we put it back (ioPutBack) and return.
|
||||
;
|
||||
; If gotoNextLine encounters anything else than whitespace, comment or line
|
||||
; separator, we error out (no putback)
|
||||
|
||||
; Sets Z if we reached a new line. Unset if EOF or error.
|
||||
gotoNextLine:
|
||||
.loop1:
|
||||
; first loop is "strict", that is: we error out on non-whitespace.
|
||||
call ioGetC
|
||||
call isSepOrLineEnd
|
||||
ret nz ; error
|
||||
or a ; cp 0
|
||||
jr z, .eof
|
||||
call isLineEnd
|
||||
jr z, .loop3 ; good!
|
||||
cp ';'
|
||||
jr z, .loop2 ; comment starting, go to "fast lane"
|
||||
jr .loop1
|
||||
.loop2:
|
||||
; second loop is the "comment loop": anything is valid and we just run
|
||||
; until EOL.
|
||||
call ioGetC
|
||||
or a ; cp 0
|
||||
jr z, .eof
|
||||
cp '\' ; special case: '\' doesn't count as a line end
|
||||
; in a comment.
|
||||
jr z, .loop2
|
||||
call isLineEnd
|
||||
jr z, .loop3
|
||||
jr .loop2
|
||||
.loop3:
|
||||
; Loop 3 happens after we reach our first line sep. This means that we
|
||||
; wade through whitespace until we reach a non-whitespace character.
|
||||
call ioGetC
|
||||
or a ; cp 0
|
||||
jr z, .eof
|
||||
cp ';'
|
||||
jr z, .loop2 ; oh, another comment! go back to loop2!
|
||||
call isSepOrLineEnd
|
||||
jr z, .loop3
|
||||
; Non-whitespace. That's our goal! Put it back
|
||||
call ioPutBack
|
||||
.eof:
|
||||
cp a ; ensure Z
|
||||
ret
|
||||
|
||||
; Parse line in (HL) and read the next token in BC. The token is written on
|
||||
@ -113,17 +172,27 @@ toWord:
|
||||
; Advance HL to after the read word.
|
||||
; If no token matches, TOK_BAD is written to B
|
||||
tokenize:
|
||||
call toWord
|
||||
jr nz, .emptyline
|
||||
call readWord
|
||||
push hl ; Save advanced HL for later
|
||||
ld hl, scratchpad
|
||||
jr z, .process ; read successful, process into token.
|
||||
; Error. It could be EOL, EOF or scraptchpad size problem
|
||||
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
|
||||
; that's obviously what we want to do. If it's EOF, we can check
|
||||
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
|
||||
call gotoNextLine
|
||||
jr nz, .error
|
||||
or a ; Are we EOF?
|
||||
jr nz, tokenize ; not EOF? then continue!
|
||||
; We're EOF
|
||||
ld b, TOK_EOF
|
||||
ret
|
||||
.process:
|
||||
call isLabel
|
||||
jr z, .label
|
||||
call getInstID
|
||||
jr z, .instr
|
||||
call getDirectiveID
|
||||
jr z, .direc
|
||||
.error:
|
||||
; no match
|
||||
ld b, TOK_BAD
|
||||
jr .end
|
||||
@ -137,9 +206,4 @@ tokenize:
|
||||
ld b, TOK_LABEL
|
||||
.end:
|
||||
ld c, a
|
||||
pop hl
|
||||
ret
|
||||
.emptyline:
|
||||
ld b, TOK_EMPTY
|
||||
; no HL to pop, we jumped before the push
|
||||
ret
|
||||
|
Loading…
Reference in New Issue
Block a user