zasm: big I/O overhaul

Instead of buffering input in memory one line at a time, we go in "just in time" mode and always read contents directly from I/O, without buffering. It forces us to implement a `ioPutback` scheme, but on the other hand it greatly simplifies cases where multiple tokens are on the same line (when a label is directly followed by an instruction). The end result feels much more solid and less hackish.
2019-05-16 07:53:42 -04:00 · 2019-05-16 07:53:42 -04:00 · e9244b80ee
commit e9244b80ee
parent 34ff0a6c2a
5 changed files with 164 additions and 144 deletions
--- a/apps/zasm/directive.asm
+++ b/apps/zasm/directive.asm
@ -24,7 +24,6 @@ directiveHandlers:

 handleDB:
 	push	hl
-	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseLiteral
@ -36,7 +35,6 @@ handleDB:

 handleDW:
 	push	hl
-	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseExpr
@ -51,7 +49,9 @@ handleDW:
 handleEQU:
 	call	zasmIsFirstPass
 	jr	nz, .begin
-	; first pass? .equ are noops
+	; first pass? .equ are noops Consume args and return
+	call	readWord
+	call	readWord
 	xor	a
 	ret
 .begin:
@ -59,19 +59,14 @@ handleEQU:
 	push	de
 	push	bc
 	; Read our constant name
-	call	toWord
 	call	readWord
 	; We can't register our symbol yet: we don't have our value!
 	; Let's copy it over.
-	push	hl
-		ld	hl, scratchpad
-		ld	de, DIREC_SCRATCHPAD
-		ld	bc, SCRATCHPAD_SIZE
-		ldir
-	pop	hl
+	ld	de, DIREC_SCRATCHPAD
+	ld	bc, SCRATCHPAD_SIZE
+	ldir

 	; Now, read the value associated to it
-	call	toWord
 	call	readWord
 	ld	hl, scratchpad
 	call	parseExpr
@ -102,7 +97,7 @@ getDirectiveID:
 	pop	bc
 	ret

-; Parse directive specified in A (D_* const) with args in (HL) and act in
+; Parse directive specified in A (D_* const) with args in I/O and act in
 ; an appropriate manner. If the directive results in writing data at its
 ; current location, that data is in (direcData) and A is the number of bytes
 ; in it.
--- a/apps/zasm/instr.asm
+++ b/apps/zasm/instr.asm
@ -708,16 +708,14 @@ getUpcode:
 	pop	ix
 	ret

-; Parse next argument in string (HL) and place it in (DE)
+; Parse next argument in I/O and place it in (DE)
 ; Sets Z on success, reset on error.
 processArg:
-	call	toWord
 	call	readWord
-	; Read word is in scratchpad, HL is properly advanced. Now, let's push
+	jr	nz, .noarg
+	; Read word is in (HL). Now, let's push
 	; that HL value and replace it with (scratchpad) so that we can parse
 	; that arg.
-	push	hl
-	ld	hl, scratchpad

 	call	parseArg
 	cp	0xff
@ -733,15 +731,18 @@ processArg:
 	ld	a, ixh
 	ld	(de), a
 	cp	a		; ensure Z is set
-	jr	.end
+	ret
 .error:
 	call	JUMP_UNSETZ
-.end:
-	pop	hl
+	ret
+.noarg:
+	xor	a
+	ld	(de), a
 	ret

-; Parse instruction specified in A (I_* const) with args in (HL) and write
-; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in A.
+; Parse instruction specified in A (I_* const) with args in I/O and write
+; resulting opcode(s) in (instrUpcode). Returns the number of bytes written in
+; A.
 parseInstruction:
 	push	bc
 	push	hl
--- a/apps/zasm/io.asm
+++ b/apps/zasm/io.asm
@ -1,5 +1,3 @@
-; *** Consts ***
-.equ	IO_MAX_LINELEN	0xff
 ; *** Variables ***
 .equ	IO_IN_GETC	IO_RAMSTART
 .equ	IO_IN_PUTC	IO_IN_GETC+2
@ -9,14 +7,36 @@
 .equ	IO_OUT_PUTC	IO_OUT_GETC+2
 .equ	IO_OUT_SEEK	IO_OUT_PUTC+2
 .equ	IO_OUT_TELL	IO_OUT_SEEK+2
-.equ	IO_LINEBUF	IO_OUT_TELL+2
-.equ	IO_RAMEND	IO_LINEBUF+IO_MAX_LINELEN+1
+; see ioPutBack below
+.equ	IO_PUTBACK_BUF	IO_OUT_TELL+2
+.equ	IO_RAMEND	IO_PUTBACK_BUF+1

 ; *** Code ***

+ioInit:
+	xor	a
+	ld	(IO_PUTBACK_BUF), a
+	ret
+
 ioGetC:
+	ld	a, (IO_PUTBACK_BUF)
+	or	a		; cp 0
+	jr	nz, .getback
 	ld	ix, (IO_IN_GETC)
 	jp	(ix)
+.getback:
+	push	af
+	xor	a
+	ld	(IO_PUTBACK_BUF), a
+	pop	af
+	ret
+
+; Put back non-zero character A into the "ioGetC stack". The next ioGetC call,
+; instead of reading from IO_IN_GETC, will return that character. That's the
+; easiest way I found to handle the readWord/gotoNextLine problem.
+ioPutBack:
+	ld	(IO_PUTBACK_BUF), a
+	ret

 ioPutC:
 	ld	ix, (IO_OUT_PUTC)
@ -26,54 +46,3 @@ ioSeek:
 	ld	ix, (IO_IN_SEEK)
 	jp	(ix)

-; Sets Z is A is CR, LF, or null.
-isLineEnd:
-	or	a	; same as cp 0
-	ret	z
-	cp	0x0d
-	ret	z
-	cp	0x0a
-	ret	z
-	cp	'\'
-	ret
-
-; Read a single line from ioGetCPtr and place it in IO_LINEBUF.
-; Returns number of chars read in A. 0 means we're at the end of our input
-; stream, which happens when GetC unsets Z. Make HL point to IO_LINEBUF.
-; We ignore empty lines and pass through them like butter.
-; A null char is written at the end of the line.
-ioReadLine:
-	push	bc
-	; consume ioGetC as long as it yields a line end char.
-.loop1:
-	call	ioGetC
-	jr	nz, .eof	; GetC unsets Z? We don't have a line to read,
-				; we have EOF.
-	call	isLineEnd
-	jr	z, .loop1
-	; A contains the first char of our line.
-	ld	c, 1
-	ld	(IO_LINEBUF), a
-	ld	hl, IO_LINEBUF+1
-.loop2:
-	call	ioGetC
-	call	isLineEnd
-	jr	z, .success	; We have end of line
-	ld	(hl), a
-	inc	hl
-	inc	c
-	jr	.loop2
-
-.success:
-	; write null char at HL before we return
-	xor	a
-	ld	(hl), a
-	ld	a, c
-	ld	hl, IO_LINEBUF
-	jr	.end
-.eof:
-	xor	a
-.end:
-	pop	bc
-	ret
-
--- a/apps/zasm/main.asm
+++ b/apps/zasm/main.asm
@ -67,6 +67,7 @@ zasmMain:
 	ld	de, IO_OUT_GETC
 	call	JUMP_BLKSEL
 	; Init modules
+	call	ioInit
 	call	symInit

 	; First pass
@ -105,17 +106,18 @@ zasmParseFile:
 	ld	(ZASM_PC), de
 .loop:
 	inc	de
-	call	ioReadLine
-	or	a		; is A 0?
-	ret	z		; We have EOF
 	call	parseLine
 	ret	nz		; error
+	ld	a, b		; TOK_*
+	cp	TOK_EOF
+	ret	z		; if EOF, return now with success
 	jr	.loop

-; Parse line in (HL), write the resulting opcode(s) through ioPutC and increases
-; (ZASM_PC) by the number of bytes written. BC is set to the result of the call
-; to tokenize.
-; Sets Z if parse was successful, unset if there was an error or EOF.
+; Parse next token and accompanying args (when relevant) in I/O, write the
+; resulting opcode(s) through ioPutC and increases (ZASM_PC) by the number of
+; bytes written. BC is set to the result of the call to tokenize.
+; Sets Z if parse was successful, unset if there was an error. EOF is not an
+; error.
 parseLine:
 	call	tokenize
 	ld	a, b		; TOK_*
@ -124,22 +126,11 @@ parseLine:
 	cp	TOK_DIRECTIVE
 	jp	z, _parseDirec
 	cp	TOK_LABEL
-	jr	z, .label
-	cp	TOK_EMPTY
-	ret			; Z is correct. If empty, Z is set and not an
+	jr	z, _parseLabel
+	cp	TOK_EOF
+	ret			; Z is correct. If EOF, Z is set and not an
 				; error, otherwise, it means bad token and
 				; errors out.
-.label:
-	push	hl
-	call	_parseLabel
-	pop	hl
-	ret	nz		; error out
-	; We're finished here. However, because it's a label, it's possible that
-	; another logical line follows directly after the label. Let's parse
-	; this and propagate error.
-	call	parseLine
-	; Z has proper value
-	ret

 _parseInstr:
 	ld	a, c		; I_*
--- a/apps/zasm/tok.asm
+++ b/apps/zasm/tok.asm
@ -2,7 +2,7 @@
 TOK_INSTR	.equ	0x01
 TOK_DIRECTIVE	.equ	0x02
 TOK_LABEL	.equ	0x03
-TOK_EMPTY	.equ	0xfe	; not a bad token, just an empty line
+TOK_EOF		.equ	0xfe	; end of file
 TOK_BAD		.equ	0xff

 .equ	SCRATCHPAD_SIZE	0x20
@ -16,7 +16,17 @@ scratchpad:
 isLineEndOrComment:
 	cp	';'
 	ret	z
-	or	a	; cp 0
+	; continue to isLineEnd
+
+; Sets Z is A is CR, LF, or null.
+isLineEnd:
+	or	a	; same as cp 0
+	ret	z
+	cp	0x0d
+	ret	z
+	cp	0x0a
+	ret	z
+	cp	'\'
 	ret

 ; Sets Z is A is ' ' '\t' or ','
@ -32,8 +42,7 @@ isSep:
 isSepOrLineEnd:
 	call	isSep
 	ret	z
-	call	isLineEndOrComment
-	ret
+	jr	isLineEndOrComment

 ; Checks whether string at (HL) is a label, that is, whether it ends with a ":"
 ; Sets Z if yes, unset if no.
@ -63,48 +72,98 @@ isLabel:
 	pop	hl
 	ret

-; read word in (HL) and put it in (scratchpad), null terminated, for a maximum
-; of SCRATCHPAD_SIZE-1 characters. As a result, A is the read length. HL is
-; advanced to the next separator char.
+; Read ioGetC until a word starts, then read ioGetC as long as there is no
+; separator and put that contents in (scratchpad), null terminated, for a
+; maximum of SCRATCHPAD_SIZE-1 characters.
+; If EOL (\n, \r or comment) or EOF is hit before we could read a word, we stop
+; right there. If scratchpad is not big enough, we stop right there and error.
+; HL points to scratchpad
+; Sets Z if a word could be read, unsets if not.
 readWord:
 	push	bc
-	push	de
-	ld	de, scratchpad
-	ld	b, SCRATCHPAD_SIZE-1
-.loop:
-	ld	a, (hl)
-	call	isSepOrLineEnd
-	jr	z, .success
-	ld	(de), a
-	inc	hl
-	inc	de
-	djnz	.loop
-.success:
-	xor	a
-	ld	(de), a
-	ld	a, SCRATCHPAD_SIZE-1
-	sub	a, b
-.end:
-	pop	de
-	pop	bc
-	ret
-
-; (HL) being a string, advance it to the next non-sep character.
-; Set Z if we could do it before the line ended, reset Z if we couldn't.
-toWord:
-.loop:
-	ld	a, (hl)
+	; Get to word
+.loop1:
+	call	ioGetC
 	call	isLineEndOrComment
 	jr	z, .error
 	call	isSep
-	jr	nz, .success
+	jr	nz, .read
+	jr	.loop1
+.read:
+	ld	hl, scratchpad
+	ld	b, SCRATCHPAD_SIZE-1
+	; A contains the first letter to read
+.loop2:
+	ld	(hl), a
 	inc	hl
-	jr	.loop
+	call	ioGetC
+	call	isSepOrLineEnd
+	jr	z, .success
+	djnz	.loop2
+	; out of space. error.
 .error:
+	; We need to put the last char we've read back so that gotoNextLine
+	; behaves properly.
+	call	ioPutBack
 	call	JUMP_UNSETZ
-	ret
+	jr	.end
 .success:
-	xor	a	; ensure Z
+	call	ioPutBack
+	; null-terminate scratchpad
+	xor	a
+	ld	(hl), a
+	ld	hl, scratchpad
+.end:
+	pop	bc
+	ret
+
+; Read ioGetC until we reach the beginning of next line, skipping comments if
+; necessary. This skips all whitespace, \n, \r, comments until we reach the
+; first non-comment character. Then, we put it back (ioPutBack) and return.
+;
+; If gotoNextLine encounters anything else than whitespace, comment or line
+; separator, we error out (no putback)
+
+; Sets Z if we reached a new line. Unset if EOF or error.
+gotoNextLine:
+.loop1:
+	; first loop is "strict", that is: we error out on non-whitespace.
+	call	ioGetC
+	call	isSepOrLineEnd
+	ret	nz		; error
+	or	a		; cp 0
+	jr	z, .eof
+	call	isLineEnd
+	jr	z, .loop3	; good!
+	cp	';'
+	jr	z, .loop2	; comment starting, go to "fast lane"
+	jr	.loop1
+.loop2:
+	; second loop is the "comment loop": anything is valid and we just run
+	; until EOL.
+	call	ioGetC
+	or	a		; cp 0
+	jr	z, .eof
+	cp	'\'		; special case: '\' doesn't count as a line end
+				; in a comment.
+	jr	z, .loop2
+	call	isLineEnd
+	jr	z, .loop3
+	jr	.loop2
+.loop3:
+	; Loop 3 happens after we reach our first line sep. This means that we
+	; wade through whitespace until we reach a non-whitespace character.
+	call	ioGetC
+	or	a		; cp 0
+	jr	z, .eof
+	cp	';'
+	jr	z, .loop2	; oh, another comment! go back to loop2!
+	call	isSepOrLineEnd
+	jr	z, .loop3
+	; Non-whitespace. That's our goal! Put it back
+	call	ioPutBack
+.eof:
+	cp	a		; ensure Z
 	ret

 ; Parse line in (HL) and read the next token in BC. The token is written on
@ -113,17 +172,27 @@ toWord:
 ; Advance HL to after the read word.
 ; If no token matches, TOK_BAD is written to B
 tokenize:
-	call	toWord
-	jr	nz, .emptyline
 	call	readWord
-	push	hl		; Save advanced HL for later
-	ld	hl, scratchpad
+	jr	z, .process	; read successful, process into token.
+	; Error. It could be EOL, EOF or scraptchpad size problem
+	; Whatever it is, calling gotoNextLine is appropriate. If it's EOL
+	; that's obviously what we want to do. If it's EOF, we can check
+	; it after. If it's a scratchpad overrun, gotoNextLine handles it.
+	call	gotoNextLine
+	jr	nz, .error
+	or	a		; Are we EOF?
+	jr	nz, tokenize	; not EOF? then continue!
+	; We're EOF
+	ld	b, TOK_EOF
+	ret
+.process:
 	call	isLabel
 	jr	z, .label
 	call	getInstID
 	jr	z, .instr
 	call	getDirectiveID
 	jr	z, .direc
+.error:
 	; no match
 	ld	b, TOK_BAD
 	jr	.end
@ -137,9 +206,4 @@ tokenize:
 	ld	b, TOK_LABEL
 .end:
 	ld	c, a
-	pop	hl
-	ret
-.emptyline:
-	ld	b, TOK_EMPTY
-	; no HL to pop, we jumped before the push
 	ret