zasm: big I/O overhaul
Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish.
This commit is contained in:
parent
34ff0a6c2a
commit
e9244b80ee
@ -24,7 +24,6 @@ directiveHandlers:
|
|||||||
|
|
||||||
handleDB:
|
handleDB:
|
||||||
push hl
|
push hl
|
||||||
call toWord
|
|
||||||
call readWord
|
call readWord
|
||||||
ld hl, scratchpad
|
ld hl, scratchpad
|
||||||
call parseLiteral
|
call parseLiteral
|
||||||
@ -36,7 +35,6 @@ handleDB:
|
|||||||
|
|
||||||
handleDW:
|
handleDW:
|
||||||
push hl
|
push hl
|
||||||
call toWord
|
|
||||||
call readWord
|
call readWord
|
||||||
ld hl, scratchpad
|
ld hl, scratchpad
|
||||||
call parseExpr
|
call parseExpr
|
||||||
@ -51,7 +49,9 @@ handleDW:
|
|||||||
handleEQU:
|
handleEQU:
|
||||||
call zasmIsFirstPass
|
call zasmIsFirstPass
|
||||||
jr nz, .begin
|
jr nz, .begin
|
||||||
; first pass? .equ are noops
|
; first pass? .equ are noops Consume args and return
|
||||||
|
call readWord
|
||||||
|
call readWord
|
||||||
xor a
|
xor a
|
||||||
ret
|
ret
|
||||||
.begin:
|
.begin:
|
||||||
@ -59,19 +59,14 @@ handleEQU:
|
|||||||
push de
|
push de
|
||||||
push bc
|
push bc
|
||||||
; Read our constant name
|
; Read our constant name
|
||||||
call toWord
|
|
||||||
call readWord
|
call readWord
|
||||||
; We can't register our symbol yet: we don't have our value!
|
; We can't register our symbol yet: we don't have our value!
|
||||||
; Let's copy it over.
|
; Let's copy it over.
|
||||||
push hl
|
ld de, DIREC_SCRATCHPAD
|
||||||
ld hl, scratchpad
|
ld bc, SCRATCHPAD_SIZE
|
||||||
ld de, DIREC_SCRATCHPAD
|
ldir
|
||||||
ld bc, SCRATCHPAD_SIZE
|
|
||||||
ldir
|
|
||||||
pop hl
|
|
||||||
|
|
||||||
; Now, read the value associated to it
|
; Now, read the value associated to it
|
||||||
call toWord
|
|
||||||
call readWord
|
call readWord
|
||||||
ld hl, scratchpad
|
ld hl, scratchpad
|
||||||
call parseExpr
|
call parseExpr
|
||||||
@ -102,7 +97,7 @@ getDirectiveID:
|
|||||||
pop bc
|
pop bc
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse directive specified in A (D_* const) with args in (HL) and act in
|
; Parse directive specified in A (D_* const) with args in I/O and act in
|
||||||
; an appropriate manner. If the directive results in writing data at its
|
; an appropriate manner. If the directive results in writing data at its
|
||||||
; current location, that data is in (direcData) and A is the number of bytes
|
; current location, that data is in (direcData) and A is the number of bytes
|
||||||
; in it.
|
; in it.
|
||||||
|
@ -708,16 +708,14 @@ getUpcode:
|
|||||||
pop ix
|
pop ix
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse next argument in string (HL) and place it in (DE)
|
; Parse next argument in I/O and place it in (DE)
|
||||||
; Sets Z on success, reset on error.
|
; Sets Z on success, reset on error.
|
||||||
processArg:
|
processArg:
|
||||||
call toWord
|
|
||||||
call readWord
|
call readWord
|
||||||
; Read word is in scratchpad, HL is properly advanced. Now, let's push
|
jr nz, .noarg
|
||||||
|
; Read word is in (HL). Now, let's push
|
||||||
; that HL value and replace it with (scratchpad) so that we can parse
|
; that HL value and replace it with (scratchpad) so that we can parse
|
||||||
; that arg.
|
; that arg.
|
||||||
push hl
|
|
||||||
ld hl, scratchpad
|
|
||||||
|
|
||||||
call parseArg
|
call parseArg
|
||||||
cp 0xff
|
cp 0xff
|
||||||
@ -733,15 +731,18 @@ processArg:
|
|||||||
ld a, ixh
|
ld a, ixh
|
||||||
ld (de), a
|
ld (de), a
|
||||||
cp a ; ensure Z is set
|
cp a ; ensure Z is set
|
||||||
jr .end
|
ret
|
||||||
.error:
|
.error:
|
||||||
call JUMP_UNSETZ
|
call JUMP_UNSETZ
|
||||||
.end:
|
ret
|
||||||
pop hl
|
.noarg:
|
||||||
|
xor a
|
||||||
|
ld (de), a
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse instruction specified in A (I_* const) with args in (HL) and write
|
; Parse instruction specified in A (I_* const) with args in I/O and write
|
||||||
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A.
|
; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
|
||||||
|
; A.
|
||||||
parseInstruction:
|
parseInstruction:
|
||||||
push bc
|
push bc
|
||||||
push hl
|
push hl
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
; *** Consts ***
|
|
||||||
.equ IO_MAX_LINELEN 0xff
|
|
||||||
; *** Variables ***
|
; *** Variables ***
|
||||||
.equ IO_IN_GETC IO_RAMSTART
|
.equ IO_IN_GETC IO_RAMSTART
|
||||||
.equ IO_IN_PUTC IO_IN_GETC+2
|
.equ IO_IN_PUTC IO_IN_GETC+2
|
||||||
@ -9,14 +7,36 @@
|
|||||||
.equ IO_OUT_PUTC IO_OUT_GETC+2
|
.equ IO_OUT_PUTC IO_OUT_GETC+2
|
||||||
.equ IO_OUT_SEEK IO_OUT_PUTC+2
|
.equ IO_OUT_SEEK IO_OUT_PUTC+2
|
||||||
.equ IO_OUT_TELL IO_OUT_SEEK+2
|
.equ IO_OUT_TELL IO_OUT_SEEK+2
|
||||||
.equ IO_LINEBUF IO_OUT_TELL+2
|
; see ioPutBack below
|
||||||
.equ IO_RAMEND IO_LINEBUF+IO_MAX_LINELEN+1
|
.equ IO_PUTBACK_BUF IO_OUT_TELL+2
|
||||||
|
.equ IO_RAMEND IO_PUTBACK_BUF+1
|
||||||
|
|
||||||
; *** Code ***
|
; *** Code ***
|
||||||
|
|
||||||
|
ioInit:
|
||||||
|
xor a
|
||||||
|
ld (IO_PUTBACK_BUF), a
|
||||||
|
ret
|
||||||
|
|
||||||
ioGetC:
|
ioGetC:
|
||||||
|
ld a, (IO_PUTBACK_BUF)
|
||||||
|
or a ; cp 0
|
||||||
|
jr nz, .getback
|
||||||
ld ix, (IO_IN_GETC)
|
ld ix, (IO_IN_GETC)
|
||||||
jp (ix)
|
jp (ix)
|
||||||
|
.getback:
|
||||||
|
push af
|
||||||
|
xor a
|
||||||
|
ld (IO_PUTBACK_BUF), a
|
||||||
|
pop af
|
||||||
|
ret
|
||||||
|
|
||||||
|
; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
|
||||||
|
; instead of reading from IO_IN_GETC, will return that character. That's the
|
||||||
|
; easiest way I found to handle the readWord/gotoNextLine problem.
|
||||||
|
ioPutBack:
|
||||||
|
ld (IO_PUTBACK_BUF), a
|
||||||
|
ret
|
||||||
|
|
||||||
ioPutC:
|
ioPutC:
|
||||||
ld ix, (IO_OUT_PUTC)
|
ld ix, (IO_OUT_PUTC)
|
||||||
@ -26,54 +46,3 @@ ioSeek:
|
|||||||
ld ix, (IO_IN_SEEK)
|
ld ix, (IO_IN_SEEK)
|
||||||
jp (ix)
|
jp (ix)
|
||||||
|
|
||||||
; Sets Z is A is CR, LF, or null.
|
|
||||||
isLineEnd:
|
|
||||||
or a ; same as cp 0
|
|
||||||
ret z
|
|
||||||
cp 0x0d
|
|
||||||
ret z
|
|
||||||
cp 0x0a
|
|
||||||
ret z
|
|
||||||
cp '\'
|
|
||||||
ret
|
|
||||||
|
|
||||||
; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
|
|
||||||
; Returns number of chars read in A. 0 means we're at the end of our input
|
|
||||||
; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
|
|
||||||
; We ignore empty lines and pass through them like butter.
|
|
||||||
; A null char is written at the end of the line.
|
|
||||||
ioReadLine:
|
|
||||||
push bc
|
|
||||||
; consume ioGetC as long as it yields a line end char.
|
|
||||||
.loop1:
|
|
||||||
call ioGetC
|
|
||||||
jr nz, .eof ; GetC unsets Z? We don't have a line to read,
|
|
||||||
; we have EOF.
|
|
||||||
call isLineEnd
|
|
||||||
jr z, .loop1
|
|
||||||
; A contains the first char of our line.
|
|
||||||
ld c, 1
|
|
||||||
ld (IO_LINEBUF), a
|
|
||||||
ld hl, IO_LINEBUF+1
|
|
||||||
.loop2:
|
|
||||||
call ioGetC
|
|
||||||
call isLineEnd
|
|
||||||
jr z, .success ; We have end of line
|
|
||||||
ld (hl), a
|
|
||||||
inc hl
|
|
||||||
inc c
|
|
||||||
jr .loop2
|
|
||||||
|
|
||||||
.success:
|
|
||||||
; write null char at HL before we return
|
|
||||||
xor a
|
|
||||||
ld (hl), a
|
|
||||||
ld a, c
|
|
||||||
ld hl, IO_LINEBUF
|
|
||||||
jr .end
|
|
||||||
.eof:
|
|
||||||
xor a
|
|
||||||
.end:
|
|
||||||
pop bc
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
@ -67,6 +67,7 @@ zasmMain:
|
|||||||
ld de, IO_OUT_GETC
|
ld de, IO_OUT_GETC
|
||||||
call JUMP_BLKSEL
|
call JUMP_BLKSEL
|
||||||
; Init modules
|
; Init modules
|
||||||
|
call ioInit
|
||||||
call symInit
|
call symInit
|
||||||
|
|
||||||
; First pass
|
; First pass
|
||||||
@ -105,17 +106,18 @@ zasmParseFile:
|
|||||||
ld (ZASM_PC), de
|
ld (ZASM_PC), de
|
||||||
.loop:
|
.loop:
|
||||||
inc de
|
inc de
|
||||||
call ioReadLine
|
|
||||||
or a ; is A 0?
|
|
||||||
ret z ; We have EOF
|
|
||||||
call parseLine
|
call parseLine
|
||||||
ret nz ; error
|
ret nz ; error
|
||||||
|
ld a, b ; TOK_*
|
||||||
|
cp TOK_EOF
|
||||||
|
ret z ; if EOF, return now with success
|
||||||
jr .loop
|
jr .loop
|
||||||
|
|
||||||
; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases
|
; Parse next token and accompanying args (when relevant) in I/O, write the
|
||||||
; (ZASM_PC) by the number of bytes written. BC is set to the result of the call
|
; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
|
||||||
; to tokenize.
|
; bytes written. BC is set to the result of the call to tokenize.
|
||||||
; Sets Z if parse was successful, unset if there was an error or EOF.
|
; Sets Z if parse was successful, unset if there was an error. EOF is not an
|
||||||
|
; error.
|
||||||
parseLine:
|
parseLine:
|
||||||
call tokenize
|
call tokenize
|
||||||
ld a, b ; TOK_*
|
ld a, b ; TOK_*
|
||||||
@ -124,22 +126,11 @@ parseLine:
|
|||||||
cp TOK_DIRECTIVE
|
cp TOK_DIRECTIVE
|
||||||
jp z, _parseDirec
|
jp z, _parseDirec
|
||||||
cp TOK_LABEL
|
cp TOK_LABEL
|
||||||
jr z, .label
|
jr z, _parseLabel
|
||||||
cp TOK_EMPTY
|
cp TOK_EOF
|
||||||
ret ; Z is correct. If empty, Z is set and not an
|
ret ; Z is correct. If EOF, Z is set and not an
|
||||||
; error, otherwise, it means bad token and
|
; error, otherwise, it means bad token and
|
||||||
; errors out.
|
; errors out.
|
||||||
.label:
|
|
||||||
push hl
|
|
||||||
call _parseLabel
|
|
||||||
pop hl
|
|
||||||
ret nz ; error out
|
|
||||||
; We're finished here. However, because it's a label, it's possible that
|
|
||||||
; another logical line follows directly after the label. Let's parse
|
|
||||||
; this and propagate error.
|
|
||||||
call parseLine
|
|
||||||
; Z has proper value
|
|
||||||
ret
|
|
||||||
|
|
||||||
_parseInstr:
|
_parseInstr:
|
||||||
ld a, c ; I_*
|
ld a, c ; I_*
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
TOK_INSTR .equ 0x01
|
TOK_INSTR .equ 0x01
|
||||||
TOK_DIRECTIVE .equ 0x02
|
TOK_DIRECTIVE .equ 0x02
|
||||||
TOK_LABEL .equ 0x03
|
TOK_LABEL .equ 0x03
|
||||||
TOK_EMPTY .equ 0xfe ; not a bad token, just an empty line
|
TOK_EOF .equ 0xfe ; end of file
|
||||||
TOK_BAD .equ 0xff
|
TOK_BAD .equ 0xff
|
||||||
|
|
||||||
.equ SCRATCHPAD_SIZE 0x20
|
.equ SCRATCHPAD_SIZE 0x20
|
||||||
@ -16,7 +16,17 @@ scratchpad:
|
|||||||
isLineEndOrComment:
|
isLineEndOrComment:
|
||||||
cp ';'
|
cp ';'
|
||||||
ret z
|
ret z
|
||||||
or a ; cp 0
|
; continue to isLineEnd
|
||||||
|
|
||||||
|
; Sets Z is A is CR, LF, or null.
|
||||||
|
isLineEnd:
|
||||||
|
or a ; same as cp 0
|
||||||
|
ret z
|
||||||
|
cp 0x0d
|
||||||
|
ret z
|
||||||
|
cp 0x0a
|
||||||
|
ret z
|
||||||
|
cp '\'
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Sets Z is A is ' ' '\t' or ','
|
; Sets Z is A is ' ' '\t' or ','
|
||||||
@ -32,8 +42,7 @@ isSep:
|
|||||||
isSepOrLineEnd:
|
isSepOrLineEnd:
|
||||||
call isSep
|
call isSep
|
||||||
ret z
|
ret z
|
||||||
call isLineEndOrComment
|
jr isLineEndOrComment
|
||||||
ret
|
|
||||||
|
|
||||||
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
|
; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
|
||||||
; Sets Z if yes, unset if no.
|
; Sets Z if yes, unset if no.
|
||||||
@ -63,48 +72,98 @@ isLabel:
|
|||||||
pop hl
|
pop hl
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; read word in (HL) and put it in (scratchpad), null terminated, for a maximum
|
; Read ioGetC until a word starts, then read ioGetC as long as there is no
|
||||||
; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is
|
; separator and put that contents in (scratchpad), null terminated, for a
|
||||||
; advanced to the next separator char.
|
; maximum of SCRATCHPAD_SIZE-1 characters.
|
||||||
|
; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
|
||||||
|
; right there. If scratchpad is not big enough, we stop right there and error.
|
||||||
|
; HL points to scratchpad
|
||||||
|
; Sets Z if a word could be read, unsets if not.
|
||||||
readWord:
|
readWord:
|
||||||
push bc
|
push bc
|
||||||
push de
|
; Get to word
|
||||||
ld de, scratchpad
|
.loop1:
|
||||||
ld b, SCRATCHPAD_SIZE-1
|
call ioGetC
|
||||||
.loop:
|
|
||||||
ld a, (hl)
|
|
||||||
call isSepOrLineEnd
|
|
||||||
jr z, .success
|
|
||||||
ld (de), a
|
|
||||||
inc hl
|
|
||||||
inc de
|
|
||||||
djnz .loop
|
|
||||||
.success:
|
|
||||||
xor a
|
|
||||||
ld (de), a
|
|
||||||
ld a, SCRATCHPAD_SIZE-1
|
|
||||||
sub a, b
|
|
||||||
.end:
|
|
||||||
pop de
|
|
||||||
pop bc
|
|
||||||
ret
|
|
||||||
|
|
||||||
; (HL) being a string, advance it to the next non-sep character.
|
|
||||||
; Set Z if we could do it before the line ended, reset Z if we couldn't.
|
|
||||||
toWord:
|
|
||||||
.loop:
|
|
||||||
ld a, (hl)
|
|
||||||
call isLineEndOrComment
|
call isLineEndOrComment
|
||||||
jr z, .error
|
jr z, .error
|
||||||
call isSep
|
call isSep
|
||||||
jr nz, .success
|
jr nz, .read
|
||||||
|
jr .loop1
|
||||||
|
.read:
|
||||||
|
ld hl, scratchpad
|
||||||
|
ld b, SCRATCHPAD_SIZE-1
|
||||||
|
; A contains the first letter to read
|
||||||
|
.loop2:
|
||||||
|
ld (hl), a
|
||||||
inc hl
|
inc hl
|
||||||
jr .loop
|
call ioGetC
|
||||||
|
call isSepOrLineEnd
|
||||||
|
jr z, .success
|
||||||
|
djnz .loop2
|
||||||
|
; out of space. error.
|
||||||
.error:
|
.error:
|
||||||
|
; We need to put the last char we've read back so that gotoNextLine
|
||||||
|
; behaves properly.
|
||||||
|
call ioPutBack
|
||||||
call JUMP_UNSETZ
|
call JUMP_UNSETZ
|
||||||
ret
|
jr .end
|
||||||
.success:
|
.success:
|
||||||
xor a ; ensure Z
|
call ioPutBack
|
||||||
|
; null-terminate scratchpad
|
||||||
|
xor a
|
||||||
|
ld (hl), a
|
||||||
|
ld hl, scratchpad
|
||||||
|
.end:
|
||||||
|
pop bc
|
||||||
|
ret
|
||||||
|
|
||||||
|
; Read ioGetC until we reach the beginning of next line, skipping comments if
|
||||||
|
; necessary. This skips all whitespace, \n, \r, comments until we reach the
|
||||||
|
; first non-comment character. Then, we put it back (ioPutBack) and return.
|
||||||
|
;
|
||||||
|
; If gotoNextLine encounters anything else than whitespace, comment or line
|
||||||
|
; separator, we error out (no putback)
|
||||||
|
|
||||||
|
; Sets Z if we reached a new line. Unset if EOF or error.
|
||||||
|
gotoNextLine:
|
||||||
|
.loop1:
|
||||||
|
; first loop is "strict", that is: we error out on non-whitespace.
|
||||||
|
call ioGetC
|
||||||
|
call isSepOrLineEnd
|
||||||
|
ret nz ; error
|
||||||
|
or a ; cp 0
|
||||||
|
jr z, .eof
|
||||||
|
call isLineEnd
|
||||||
|
jr z, .loop3 ; good!
|
||||||
|
cp ';'
|
||||||
|
jr z, .loop2 ; comment starting, go to "fast lane"
|
||||||
|
jr .loop1
|
||||||
|
.loop2:
|
||||||
|
; second loop is the "comment loop": anything is valid and we just run
|
||||||
|
; until EOL.
|
||||||
|
call ioGetC
|
||||||
|
or a ; cp 0
|
||||||
|
jr z, .eof
|
||||||
|
cp '\' ; special case: '\' doesn't count as a line end
|
||||||
|
; in a comment.
|
||||||
|
jr z, .loop2
|
||||||
|
call isLineEnd
|
||||||
|
jr z, .loop3
|
||||||
|
jr .loop2
|
||||||
|
.loop3:
|
||||||
|
; Loop 3 happens after we reach our first line sep. This means that we
|
||||||
|
; wade through whitespace until we reach a non-whitespace character.
|
||||||
|
call ioGetC
|
||||||
|
or a ; cp 0
|
||||||
|
jr z, .eof
|
||||||
|
cp ';'
|
||||||
|
jr z, .loop2 ; oh, another comment! go back to loop2!
|
||||||
|
call isSepOrLineEnd
|
||||||
|
jr z, .loop3
|
||||||
|
; Non-whitespace. That's our goal! Put it back
|
||||||
|
call ioPutBack
|
||||||
|
.eof:
|
||||||
|
cp a ; ensure Z
|
||||||
ret
|
ret
|
||||||
|
|
||||||
; Parse line in (HL) and read the next token in BC. The token is written on
|
; Parse line in (HL) and read the next token in BC. The token is written on
|
||||||
@ -113,17 +172,27 @@ toWord:
|
|||||||
; Advance HL to after the read word.
|
; Advance HL to after the read word.
|
||||||
; If no token matches, TOK_BAD is written to B
|
; If no token matches, TOK_BAD is written to B
|
||||||
tokenize:
|
tokenize:
|
||||||
call toWord
|
|
||||||
jr nz, .emptyline
|
|
||||||
call readWord
|
call readWord
|
||||||
push hl ; Save advanced HL for later
|
jr z, .process ; read successful, process into token.
|
||||||
ld hl, scratchpad
|
; Error. It could be EOL, EOF or scraptchpad size problem
|
||||||
|
; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
|
||||||
|
; that's obviously what we want to do. If it's EOF, we can check
|
||||||
|
; it after. If it's a scratchpad overrun, gotoNextLine handles it.
|
||||||
|
call gotoNextLine
|
||||||
|
jr nz, .error
|
||||||
|
or a ; Are we EOF?
|
||||||
|
jr nz, tokenize ; not EOF? then continue!
|
||||||
|
; We're EOF
|
||||||
|
ld b, TOK_EOF
|
||||||
|
ret
|
||||||
|
.process:
|
||||||
call isLabel
|
call isLabel
|
||||||
jr z, .label
|
jr z, .label
|
||||||
call getInstID
|
call getInstID
|
||||||
jr z, .instr
|
jr z, .instr
|
||||||
call getDirectiveID
|
call getDirectiveID
|
||||||
jr z, .direc
|
jr z, .direc
|
||||||
|
.error:
|
||||||
; no match
|
; no match
|
||||||
ld b, TOK_BAD
|
ld b, TOK_BAD
|
||||||
jr .end
|
jr .end
|
||||||
@ -137,9 +206,4 @@ tokenize:
|
|||||||
ld b, TOK_LABEL
|
ld b, TOK_LABEL
|
||||||
.end:
|
.end:
|
||||||
ld c, a
|
ld c, a
|
||||||
pop hl
|
|
||||||
ret
|
|
||||||
.emptyline:
|
|
||||||
ld b, TOK_EMPTY
|
|
||||||
; no HL to pop, we jumped before the push
|
|
||||||
ret
|
ret
|
||||||
|
Loading…
Reference in New Issue
Block a user