From 49228e418c3035b7dca1f94acab36b465805666a Mon Sep 17 00:00:00 2001
From: Virgil Dupras <hsoft@hardcoded.net>
Date: Sat, 7 Mar 2020 12:13:15 -0500
Subject: [PATCH] apps/forth: new (WIP) application

---
 apps/forth/README.md      |   7 +++
 apps/forth/dict.asm       | 110 ++++++++++++++++++++++++++++++++++++++++++++++
 apps/forth/dictionary.txt |  10 +++++
 apps/forth/glue.asm       |  10 +++++
 apps/forth/main.asm       |  36 +++++++++++++++
 apps/forth/stack.asm      |  25 +++++++++++
 apps/forth/util.asm       | 101 ++++++++++++++++++++++++++++++++++++++++++
 emul/.gitignore           |   1 +
 emul/Makefile             |   2 +-
 9 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 apps/forth/README.md
 create mode 100644 apps/forth/dict.asm
 create mode 100644 apps/forth/dictionary.txt
 create mode 100644 apps/forth/glue.asm
 create mode 100644 apps/forth/main.asm
 create mode 100644 apps/forth/stack.asm
 create mode 100644 apps/forth/util.asm
diff --git a/apps/forth/README.md b/apps/forth/README.md
new file mode 100644
index 0000000..14d40c5
--- /dev/null
+++ b/apps/forth/README.md
@@ -0,0 +1,7 @@
+# Forth
+
+**WIP** A Forth interpreter. Far from complete, but you can do stuff like
+
+    KEY EMIT KEY EMIT
+
+See dictionary.txt for a word reference.
diff --git a/apps/forth/dict.asm b/apps/forth/dict.asm
new file mode 100644
index 0000000..ccef6e4
--- /dev/null
+++ b/apps/forth/dict.asm
@@ -0,0 +1,110 @@
+; A dictionary entry has this structure:
+; - 8b name (zero-padded)
+; - 2b prev pointer
+; - 2b code pointer
+; - Parameter field area (PFA)
+;
+; The code pointer point to "word routines". These routines expect to be called
+; with IY pointing to the PFA. They themselves are expected to end by jumping
+; to the address at the top of the Return Stack. They will usually do so with
+; "jp exit".
+
+; Execute a word containing native code at its PFA
+nativeWord:
+	jp	(iy)
+
+; Execute a compiled word containing a list of references to other words,
+; usually ended by a reference to EXIT.
+; A reference to a word in a compiledWord section is *not* a direct reference,
+; but a word+CODELINK_OFFSET reference. Therefore, for a code link "link",
+; (link) is the routine to call.
+compiledWord:
+	push	iy \ pop hl
+	inc	hl
+	inc	hl
+	; HL points to next Interpreter pointer.
+	call	pushRS
+	ld	l, (iy)
+	ld	h, (iy+1)
+	push	hl \ pop iy
+	; IY points to code link
+	jp	executeCodeLink
+
+; ( R:I -- )
+EXIT:
+	.db "EXIT", 0, 0, 0, 0
+	.dw 0
+	.dw nativeWord
+; When we call the EXIT word, we have to do a "double exit" because our current
+; Interpreter pointer is pointing to the word *next* to our EXIT reference when,
+; in fact, we want to continue processing the one above it.
+	call	popRS
+exit:
+	call	popRS
+	; We have a pointer to a word
+	push	hl \ pop iy
+	jp	compiledWord
+
+; ( c -- )
+EMIT:
+	.db "EMIT", 0, 0, 0, 0
+	.dw EXIT
+	.dw nativeWord
+	pop	hl
+	ld	a, l
+	call	stdioPutC
+	jp	exit
+
+; ( addr -- )
+EXECUTE:
+	.db "EXECUTE", 0
+	.dw EMIT
+	.dw nativeWord
+	pop	iy	; Points to word_offset
+	ld	de, CODELINK_OFFSET
+	add	iy, de
+executeCodeLink:
+	ld	l, (iy)
+	ld	h, (iy+1)
+	; HL points to code pointer
+	inc	iy
+	inc	iy
+	; IY points to PFA
+	jp	(hl)	; go!
+
+; ( -- c )
+KEY:
+	.db "KEY", 0, 0, 0, 0, 0
+	.dw EXECUTE
+	.dw nativeWord
+	call	stdioGetC
+	ld	h, 0
+	ld	l, a
+	push	hl
+	jp	exit
+
+INTERPRET:
+	.db "INTERPRE"
+	.dw KEY
+	.dw nativeWord
+	call	pad
+	push	hl \ pop iy
+	call	stdioReadLine
+	ld	(INPUTPOS), hl
+.loop:
+	call	readword
+	jp	nz, .loopend
+	call	compile
+	jr	nz, .notfound
+	jr	.loop
+.loopend:
+	call	compileExit
+	call	pad
+	push	hl \ pop iy
+	jp	compiledWord
+.notfound:
+	ld	hl, .msg
+	call	printstr
+	jp	exit
+.msg:
+	.db	"not found", 0
diff --git a/apps/forth/dictionary.txt b/apps/forth/dictionary.txt
new file mode 100644
index 0000000..2599b02
--- /dev/null
+++ b/apps/forth/dictionary.txt
@@ -0,0 +1,10 @@
+Stack notation: "<stack before> -- <stack after>". Rightmost is top of stack
+                (TOS). For example, in "a b -- c d", b is TOS before, d is TOS
+                after. "R:" means that the Return Stack is modified.
+
+EMIT        c --            Spit char c to stdout
+EXECUTE     a --            Execute word at addr a
+EXIT        R:I --          Exit a colon definition
+KEY         -- c            Get char c from stdin
+INTERPRET   --              Get a line from stdin, compile it in tmp memory,
+                            then execute the compiled contents.
diff --git a/apps/forth/glue.asm b/apps/forth/glue.asm
new file mode 100644
index 0000000..32bcded
--- /dev/null
+++ b/apps/forth/glue.asm
@@ -0,0 +1,10 @@
+.inc "user.h"
+jp	forthMain
+
+.inc "core.asm"
+.equ FORTH_RAMSTART RAMSTART
+.inc "forth/main.asm"
+.inc "forth/util.asm"
+.inc "forth/stack.asm"
+.inc "forth/dict.asm"
+RAMSTART:
diff --git a/apps/forth/main.asm b/apps/forth/main.asm
new file mode 100644
index 0000000..331885b
--- /dev/null
+++ b/apps/forth/main.asm
@@ -0,0 +1,36 @@
+; *** Const ***
+; Base of the Return Stack
+.equ	RS_ADDR		0xf000
+; Number of bytes we keep as a padding between HERE and the scratchpad
+.equ	PADDING		0x20
+; Offset of the code link relative to the beginning of the word
+.equ	CODELINK_OFFSET	10
+
+; *** Variables ***
+.equ	INITIAL_SP	FORTH_RAMSTART
+.equ	CURRENT		@+2
+.equ	HERE		@+2
+.equ	INPUTPOS	@+2
+.equ	FORTH_RAMEND	@+2
+
+; *** Code ***
+MAIN:
+	.dw compiledWord
+	.dw INTERPRET+CODELINK_OFFSET
+	.dw ENDPGM
+
+ENDPGM:
+	.dw nativeWord
+	ld	sp, (INITIAL_SP)
+	xor	a
+	ret
+
+forthMain:
+	ld	(INITIAL_SP), sp
+	ld	hl, INTERPRET	; last entry in hardcoded dict
+	ld	(CURRENT), hl
+	ld	hl, FORTH_RAMEND
+	ld	(HERE), hl
+	ld	ix, RS_ADDR
+	ld	iy, MAIN
+	jp	executeCodeLink
diff --git a/apps/forth/stack.asm b/apps/forth/stack.asm
new file mode 100644
index 0000000..ebad3d2
--- /dev/null
+++ b/apps/forth/stack.asm
@@ -0,0 +1,25 @@
+; The Parameter stack (PS) is maintained by SP and the Return stack (RS) is
+; maintained by IX. This allows us to generally use push and pop freely because
+; PS is the most frequently used. However, this causes a problem with routine
+; calls: because in Forth, the stack isn't balanced within each call, our return
+; offset, when placed by a CALL, messes everything up. This is one of the
+; reasons why we need stack management routines below.
+;
+; This return stack contain "Interpreter pointers", that is a pointer to the
+; address of a word, as seen in a compiled list of words.
+
+; Push value HL to RS
+pushRS:
+	ld	(ix), l
+	inc	ix
+	ld	(ix), h
+	inc	ix
+	ret
+
+; Pop RS' TOS to HL
+popRS:
+	dec ix
+	ld	h, (ix)
+	dec ix
+	ld	l, (ix)
+	ret
diff --git a/apps/forth/util.asm b/apps/forth/util.asm
new file mode 100644
index 0000000..daa78ba
--- /dev/null
+++ b/apps/forth/util.asm
@@ -0,0 +1,101 @@
+; Return address of scratchpad in HL
+pad:
+	ld	hl, (HERE)
+	ld	de, PADDING
+	add	hl, de
+	ret
+
+; Read word from (INPUTPOS) and return, in HL, a null-terminated word.
+; Advance (INPUTPOS) to the character following the whitespace ending the
+; word.
+; Z set of word was read, unset if end of line.
+readword:
+	ld	hl, (INPUTPOS)
+	; skip leading whitespace
+	dec	hl	; offset leading "inc hl"
+.loop1:
+	inc	hl
+	ld	a, (hl)
+	or	a
+	jr	z, .empty
+	cp	' '+1
+	jr	c, .loop1
+	push	hl		; --> lvl 1. that's our result
+.loop2:
+	inc	hl
+	ld	a, (hl)
+	; special case: is A null? If yes, we will *not* inc A so that we don't
+	; go over the bounds of our input string.
+	or	a
+	jr	z, .noinc
+	cp	' '+1
+	jr	nc, .loop2
+	; we've just read a whitespace, HL is pointing to it. Let's transform
+	; it into a null-termination, inc HL, then set (INPUTPOS).
+	xor	a
+	ld	(hl), a
+	inc	hl
+.noinc:
+	ld	(INPUTPOS), hl
+	pop	hl		; <-- lvl 1. our result
+	ret	; Z set from XOR A
+.empty:
+	ld	(hl), a
+	inc	a	; unset Z
+	ret
+
+; For DE pointing to a dict entry, set DE to point to the previous entry.
+; Z is set if DE point to 0 (no entry). NZ if not.
+prev:
+	push	hl		; --> lvl 1
+	ld	hl, 8		; prev field offset
+	add	hl, de
+	ex	de, hl
+	pop	hl		; <-- lvl 1
+	call	intoDE
+	; DE points to prev. Is it zero?
+	xor	a
+	or	d
+	or	e
+	; Z will be set if DE is zero
+	ret
+
+; Find the entry corresponding to word where (HL) points to and sets DE to
+; point to that entry.
+; Z if found, NZ if not.
+find:
+	ld	de, (CURRENT)
+.inner:
+	ld	a, 8
+	call	strncmp
+	ret	z		; found
+	call	prev
+	jr	nz, .inner
+	; Z set? end of dict unset Z
+	inc	a
+	ret
+
+; Compile word string at (HL) and write down its compiled version in IY,
+; advancing IY to the byte next to the last written byte.
+; Set Z on success, unset on failure.
+compile:
+	call	find
+	ret	nz
+	; DE is a word offset, we need a code link
+	ld	hl, CODELINK_OFFSET
+	add	hl, de
+	ld	(iy), l
+	inc	iy
+	ld	(iy), h
+	inc	iy
+	xor	a	; set Z
+	ret
+
+compileExit:
+	ld	hl, EXIT+CODELINK_OFFSET
+	ld	(iy), l
+	inc	iy
+	ld	(iy), h
+	inc	iy
+	ret
+
diff --git a/emul/.gitignore b/emul/.gitignore
index db7da81..9ee7d93 100644
--- a/emul/.gitignore
+++ b/emul/.gitignore
@@ -8,4 +8,5 @@
 /cfsin/zasm
 /cfsin/ed
 /cfsin/basic
+/cfsin/forth
 /cfsin/user.h
diff --git a/emul/Makefile b/emul/Makefile
index ef0fa05..5cd6d13 100644
--- a/emul/Makefile
+++ b/emul/Makefile
@@ -4,7 +4,7 @@ KERNEL = ../kernel
 APPS = ../apps
 ZASMBIN = zasm/zasm
 AVRABIN = zasm/avra
-SHELLAPPS = zasm ed
+SHELLAPPS = zasm ed forth
 SHELLTGTS = ${SHELLAPPS:%=cfsin/%}
 CFSIN_CONTENTS = $(SHELLTGTS) cfsin/user.h
 OBJS = emul.o libz80/libz80.o