forth: Simplify execution model

Change the mainloop so that words are executed immediately after they're read.
This greatly simplifies execution model and allow the "DEFINE" word to become
an IMMEDIATE and stop its "copy from compiled words" scheme.

The downside to this is that flow control words no longer work when being used
directly in the input buffer. They only work as part of a definition.

It also broke "RECURSE", but I've replaced it with "BEGIN" and "AGAIN".

Another effect of this change is that definitions can now span multiple lines.

All in all, it feels good to get rid of that COMPBUF...
This commit is contained in:
Virgil Dupras 2020-03-15 22:46:17 -04:00
parent 7befe56597
commit 80ab395823
5 changed files with 150 additions and 284 deletions

View File

@ -4,16 +4,15 @@
: ALLOT HERE +! ;
: , H ! 2 ALLOT ;
: C, H C! 1 ALLOT ;
: BEGIN H ; IMMEDIATE
: AGAIN ['] (bbr) , H -^ C, ; IMMEDIATE
: NOT 1 SWAP SKIP? EXIT 0 * ;
: RECURSE R> R> 2 - >R >R EXIT ;
: ( LIT@ ) WORD SCMP NOT SKIP? RECURSE ; IMMEDIATE
: ( BEGIN LITS ) WORD SCMP NOT SKIP? AGAIN ; IMMEDIATE
( Hello, hello, krkrkrkr... do you hear me?
Ah, voice at last! Some lines above need comments
BTW: Forth lines limited to 64 cols because of default
input buffer size in Collapse OS
NOT: a bit convulted because we don't have IF yet
RECURSE: RS TOS is for RECURSE itself, then we have to dig
one more level to get to RECURSE's parent's caller.
IF true, skip following (fbr). Also, push br cell ref H,
to PS )
: IF ['] SKIP? , ['] (fbr) , H 1 ALLOT ; IMMEDIATE

View File

@ -97,17 +97,14 @@ NUMBER:
; Similarly to numberWord, this is not a real word, but a string literal.
; Instead of being followed by a 2 bytes number, it's followed by a
; null-terminated string. This is not expected to be called in a regular
; context. Only words expecting those literals will look for them. This is why
; the litWord triggers abort.
; null-terminated string. When called, puts the string's address on PS
litWord:
ld hl, (IP)
call printstr ; let's print the word before abort.
ld hl, .msg
call printstr
jp abort
.msg:
.db "undefined word", 0
push hl
call strskip
inc hl ; after null termination
ld (IP), hl
jp next
.db 0b10 ; Flags
LIT:
@ -142,11 +139,24 @@ quit:
ABORT:
.dw nativeWord
abort:
; flush rest of input
ld hl, (INPUTPOS)
xor a
ld (hl), a
; Reinitialize PS (RS is reinitialized in forthInterpret)
ld sp, (INITIAL_SP)
jp forthRdLineNoOk
ABORTREF:
.dw ABORT
; prints msg in (HL) then aborts
abortMsg:
call printstr
jr abort
abortUnknownWord:
ld hl, .msg
jr abortMsg
.msg:
.db "unknown word", 0
.db "BYE"
.fill 4
@ -220,7 +230,7 @@ EXECUTE:
.db 1 ; IMMEDIATE
COMPILE:
.dw nativeWord
pop hl ; word addr
call readword
call find
jr nz, .maybeNum
ex de, hl
@ -242,29 +252,11 @@ COMPILE:
call .writeDE
jp next
.undef:
; When encountering an undefined word during compilation, we spit a
; reference to litWord, followed by the null-terminated word.
; This way, if a preceding word expect a string literal, it will read it
; by calling readLIT, and if it doesn't, the routine will be
; called, triggering an abort.
ld de, LIT
call .writeDE
ld de, (HERE)
call strcpyM
ld (HERE), de
jp next
call printstr
jp abortUnknownWord
.immed:
; For this IMMEDIATE word to be compatible with regular execution model,
; it needs to be compiled as an atom somewhere in memory.
; For example, RECURSE backtracks in RS and steps back 2 bytes. This
; can only work with our compiled atom being next to an EXIT atom.
ex de, hl ; atom to write in DE
ld hl, (OLDHERE)
push hl \ pop iy
call DEinHL
ld de, EXIT
call DEinHL
jp compiledWord
push hl
jp EXECUTE+2
.writeDE:
push hl
ld hl, (HERE)
@ -274,56 +266,49 @@ COMPILE:
ret
.db ";"
.db ":"
.fill 6
.dw COMPILE
.db 0
ENDDEF:
.dw nativeWord
jp EXIT+2
.db ":"
.fill 6
.dw ENDDEF
.db 0
.db 1 ; IMMEDIATE
DEFINE:
.dw nativeWord
call entryhead
ld de, compiledWord
call DEinHL
; At this point, we've processed the name literal following the ':'.
; What's next? We have, in IP, a pointer to words that *have already
; been compiled by INTERPRET*. All those bytes will be copied as-is.
; All we need to do is to know how many bytes to copy. To do so, we
; skip compwords until EXIT is reached.
ex de, hl ; DE is our dest
ld (HERE), de ; update HERE
ld hl, (IP)
ld (HERE), hl
.loop:
push de ; --> lvl 1
ld de, ENDDEF
call HLPointsDE
pop de ; <-- lvl 1
jr z, .loopend
call compSkip
jr .loop
.loopend:
; skip EXIT
inc hl \ inc hl
; We have out end offset. Let's get our offset
ld de, (IP)
or a ; clear carry
sbc hl, de
; HL is our copy count.
ld b, h
ld c, l
; did we reach ";"?
ld hl, (INPUTPOS)
ld a, (hl)
cp ';'
jr nz, .compile
inc hl
ld a, (hl)
cp ' '+1
jr c, .loopend ; whitespace, we have semicol. end
.compile:
ld hl, (IP)
ld de, (HERE) ; recall dest
; copy!
ldir
call pushRS
ld hl, .retRef
ld (IP), hl
ld (HERE), de
ld hl, COMPILE
push hl
jp EXECUTE+2
.loopend:
; Advance (INPUTPOS) to after semicol. HL is already there.
ld (INPUTPOS), hl
; write EXIT and return
ld hl, (HERE)
ld de, EXIT
call DEinHL
ld (HERE), hl
jp next
.retRef:
.dw $+2
.dw $+2
call popRS
ld (IP), hl
jr .loop
.db "DOES>"
@ -359,10 +344,11 @@ IMMEDIATE:
jp next
; ( n -- )
.db "LITERAL"
.dw IMMEDIATE
.db 1 ; IMMEDIATE
LITERAL:
.db "LITN"
.fill 3
.dw IMMEDIATE
.db 1 ; IMMEDIATE
LITN:
.dw nativeWord
ld hl, (HERE)
ld de, NUMBER
@ -372,14 +358,28 @@ LITERAL:
ld (HERE), hl
jp next
.db "LITS"
.fill 3
.dw LITN
.db 1 ; IMMEDIATE
LITS:
.dw nativeWord
ld hl, (HERE)
ld de, LIT
call DEinHL
ex de, hl ; (HERE) in DE
call readword
call strcpyM
ld (HERE), de
jp next
.db "'"
.fill 6
.dw LITERAL
.dw LITS
.db 0
APOS:
.dw nativeWord
call readLITBOS
call readword
call find
jr nz, .notfound
push de
@ -542,20 +542,10 @@ CFETCH:
push hl
jp next
.db "LIT@"
.fill 3
.dw CFETCH
.db 0
LITFETCH:
.dw nativeWord
call readLITTOS
push hl
jp next
; ( a -- )
.db "DROP"
.fill 3
.dw LITFETCH
.dw CFETCH
.db 0
DROP:
.dw nativeWord
@ -818,6 +808,19 @@ FBR:
pop de
jp next
LATEST:
.dw FBR
.db "(bbr)"
.fill 2
.dw FBR
.db 0
BBR:
.dw nativeWord
ld hl, (IP)
ld d, 0
ld e, (hl)
or a ; clear carry
sbc hl, de
ld (IP), hl
jp next
LATEST:
.dw BBR

View File

@ -42,22 +42,30 @@ directly, but as part of another word.
ALLOT n -- Move HERE by n bytes
C, b -- Write byte b in HERE and advance it.
CREATE x -- Create cell named x. Doesn't allocate a PF.
COMPILE x -- Compile word x and write it to HERE
CONSTANT x n -- Creates cell x that when called pushes its value
DOES> -- See description at top of file
IMMEDIATE -- Flag the latest defined word as immediate.
LITERAL n -- *I* Inserts number from TOS as a literal
LITN n -- *I* Inserts number from TOS as a literal
VARIABLE c -- Creates cell x with 2 bytes allocation.
*** Flow ***
Note about flow words: flow words can only be used in definitions. In the
INTERPRET loop, they don't have the desired effect because each word from the
input stream is executed immediately. In this context, branching doesn't work.
(fbr) -- Branches forward by the number specified in its
atom's cell.
COMPILE a -- Compile string word at addr a and spit it to HERE.
(bbr) -- Branches backward by the number specified in its
atom's cell.
AGAIN I:a -- *I* Jump backwards to preceeding BEGIN.
BEGIN -- I:a *I* Marker for backward branching with AGAIN.
ELSE I:a -- *I* Compiles a (fbr) and set branching cell at a.
EXECUTE a -- Execute wordref at addr a
IF -- I:a *I* Compiles a (fbr?) and pushes its cell's addr
INTERPRET -- Get a line from stdin, compile it in tmp memory,
then execute the compiled contents.
QUIT R:drop -- Return to interpreter promp immediately
QUIT R:drop -- Return to interpreter prompt immediately
RECURSE R:I -- R:I-2 Run the current word again.
SKIP? f -- If f is true, skip the execution of the next atom.
Use this right before ";" and you're gonna have a
@ -108,7 +116,7 @@ CMP n1 n2 -- n Compare n1 and n2 and set n to -1, 0, or 1.
NOT f -- f Push the logical opposite of f
*** Strings ***
LIT@ x -- a Read following LIT and push its addr to a
LITS x -- a Read following LIT and push its addr to a
SCMP a1 a2 -- n Compare strings a1 and a2. See CMP
*** I/O ***

View File

@ -19,18 +19,14 @@
.equ INITIAL_SP FORTH_RAMSTART
; wordref of the last entry of the dict.
.equ CURRENT @+2
; Pointer to the next free byte in dict. During compilation of input text, this
; temporarily points to the next free byte in COMPBUF.
; Pointer to the next free byte in dict.
.equ HERE @+2
; Used to hold HERE while we temporarily point it to COMPBUF
.equ OLDHERE @+2
; Interpreter pointer. See Execution model comment below.
.equ IP @+2
; Pointer to where we currently are in the interpretation of the current line.
.equ INPUTPOS @+2
; Buffer where we compile the current input line. Same size as STDIO_BUFSIZE.
.equ COMPBUF @+2
.equ FORTH_RAMEND @+0x40
.equ FORTH_RAMEND @+2
; (HERE) usually starts at RAMEND, but in certain situations, such as in stage0,
; (HERE) will begin at a strategic place.
@ -41,30 +37,11 @@
; a general rule, we go like this:
;
; 1. read single word from line
; 2. compile word to atom
; 3. if immediate, execute atom
; 4. goto 1 until we exhaust words
; 5. Execute compiled atom list as if it was a regular compiledWord.
;
; Because the Parameter Stack uses SP, we can't just go around calling routines:
; This messes with the PS. This is why we almost always jump (unless our call
; doesn't involve Forth words in any way).
;
; This presents a challenge for our interpret loop because step 4, "goto 1"
; isn't obvious. To be able to do that, we must push a "return routine" to the
; Return Stack before step 3.
;
; HERE and IMMEDIATE: When compiling in step 2, we spit compiled atoms in
; (HERE) to simplify "," semantic in Forth (spitting, in all cases, is done in
; (HERE)). However, suring input line compilation, it isn't like during ":", we
; aren't creating a new entry.
;
; Compiling and executing from (HERE) would be dangerous because an
; entry-creation word, during runtime, could end up overwriting the atom list
; we're executing. This is why we have this list in COMPBUF.
;
; During IMMEDIATE mode, (HERE) is temporarily set to COMPBUF, and when we're
; done, we restore (HERE) for runtime. This way, everyone is happy.
; 2. Can we find the word in dict?
; 3. If yes, execute that word, goto 1
; 4. Is it a number?
; 5. If yes, push that number to PS, goto 1
; 6. Error: undefined word.
;
; EXECUTING A WORD
;
@ -113,54 +90,45 @@ forthRdLine:
forthRdLineNoOk:
; Setup return stack. After INTERPRET, we run forthExecLine
ld ix, RS_ADDR
; We're about to compile the line and possibly execute IMMEDIATE words.
; Let's save current (HERE) and temporarily set it to COMPBUF.
ld hl, (HERE)
ld (OLDHERE), hl
ld hl, COMPBUF
ld (HERE), hl
ld hl, .retRef
ld (IP), hl
ld hl, INTERPRET
ld hl, MAINLOOP
push hl
jp EXECUTE+2
.retRef:
.dw $+2
.dw forthExecLine
forthExecLine:
ld de, QUIT
ld hl, (HERE)
call DEinHL
ld (HERE), hl
; Compilation done, let's restore (HERE) and execute!
ld hl, (OLDHERE)
ld (HERE), hl
ld iy, COMPBUF
; before we execute, let's play with our RS a bit: compiledWord is
; going to push (IP) on the RS, but we don't expect our compiled words
; to ever return: it ends with QUIT. Let's set (IP) to ABORTREF and
; IX to RS_ADDR-2 so that compiledWord re-pushes our safety net.
ld hl, ABORTREF
ld (IP), hl
ld ix, RS_ADDR-2
jp compiledWord
; (we don't have RECURSE here. Calling interpret makes us needlessly use our
; RS stack, but it can take it, can't it? )
; WORD COMPILE IN> @ C@ (to check if null) SKIP? (skip if not null) EXIT INTERPRET
.db 0b10 ; UNWORD
INTERPRET:
.dw nativeWord
pop hl ; from WORD
ld a, (hl) ; special case: empty
or a
jp z, next
call find
jr nz, .maybeNum
; regular word
push de
jp EXECUTE+2
.maybeNum:
push hl ; --> lvl 1. save string addr
call parseLiteral
pop hl ; <-- lvl 1
jr nz, .undef
; a valid number in DE!
push de
jp next
.undef:
call printstr
jp abortUnknownWord
.db 0b10 ; UNWORD
MAINLOOP:
.dw compiledWord
.dw WORD
.dw COMPILE
.dw INTERPRET
.dw INP
.dw FETCH
.dw CFETCH
.dw CSKIP
.dw EXIT
.dw INTERPRET
.dw EXIT
.dw QUIT
.dw MAINLOOP
msgOk:
.db " ok", 0

View File

@ -74,6 +74,10 @@ HLPointsBR:
push de
ld de, FBR
call HLPointsDE
jr z, .end
ld de, BBR
call HLPointsDE
.end:
pop de
ret
@ -105,121 +109,6 @@ compSkip:
inc hl \ inc hl
ret
; ***readLIT***
; The goal of this routine is to read a string literal following the currently
; executed words. For example, CREATE and DEFINE need this. Things are a little
; twisted, so bear with me while I explain how it works.
;
; When we call this routine, everything has been compiled. We're on an atom and
; we're executing it. Now, we're looking for a string literal or a word-with-a
; name that follows our readLIT caller. We could think that this word is
; right there on RS' TOS, but not always! You have to account for words wrapping
; the caller. For example, "VARIABLE" calls "CREATE". If you call
; "VARIABLE foo", if CREATE looks at what follows in RS' TOS, it will only find
; the "2" in "CREATE 2 ALLOT".
;
; In this case, we actually need to check in RS' *bottom of stack* for our
; answer. If that atom is a LIT, we're good. We make HL point to it and advance
; IP to byte following null-termination.
;
; If it isn't, things get interesting: If it's a word reference, then it's
; not an invalid literal. For example, one could want to redefine an existing
; word. So in that case, we'll copy the word's name on the pad (it might not be
; null-terminated) and set HL to point to it.
; How do we know that our reference is a word reference (it could be, for
; example, a NUMBER reference)? We check that its address is more than QUIT, the
; second word in our dict. We don't accept EXIT because it's the termination
; word. Yeah, it means that ";" can't be overridden...
; If name can't be read, we abort
;
; BOS vs TOS: What we cover so far is the "CREATE" and friends cases, where we
; want to read BOS. There are, however, cases where we want to read TOS, that is
; that we want to read the LIT right next to our atom. Example: "(". When
; processing comments, we are at compile time and want to read words from BOS,
; yes), however, in "("'s definition, there's "LIT@ )", which means "fetch LIT
; next to me and push this to stack". This LIT we want to fetch is *not* from
; BOS, it's from TOS.
;
; This is why we have readLITBOS and readLITTOS. readLIT uses HL and DE and is
; not used directly.
; Given a RS stack pointer HL, read LIT next to it (or abort) and set HL to
; point to its associated string. Set DE to there the RS stack pointer should
; point next.
readLIT:
call HLPointsLIT
jr nz, .notLIT
; RS BOS is a LIT, make HL point to string, then skip this RS compword.
inc hl \ inc hl ; HL now points to string itself
; HL has our its final value
ld d, h
ld e, l
call strskip
inc hl ; byte after word termination
ex de, hl
ret
.notLIT:
; Alright, not a literal, but is it a word?
call HLPointsUNWORD
jr z, .notWord
; Not a number, then it's a word. Copy word to pad and point to it.
push hl ; --> lvl 1. we need it to set DE later
call intoHL
or a ; clear carry
ld de, CODELINK_OFFSET
sbc hl, de
; That's our return value
push hl ; --> lvl 2
; HL now points to word offset, let'd copy it to pad
ex de, hl
call pad
ex de, hl
ld bc, NAMELEN
ldir
; null-terminate
xor a
ld (de), a
pop hl ; <-- lvl 2
pop de ; <-- lvl 1
; Advance IP by 2
inc de \ inc de
ret
.notWord:
ld hl, .msg
call printstr
jp abort
.msg:
.db "word expected", 0
readLITBOS:
; Before we start: is our RS empty? If IX == RS_ADDR, it is (it only has
; its safety net). When that happens, we actually want to run readLITTOS
push hl
push de
push ix \ pop hl
ld de, RS_ADDR
or a ; clear carry
sbc hl, de
pop de
pop hl
jr z, readLITTOS
push de
; Our bottom-of-stack is RS_ADDR+2 because RS_ADDR is occupied by our
; ABORTREF safety net.
ld hl, (RS_ADDR+2)
call readLIT
ld (RS_ADDR+2), de
pop de
ret
readLITTOS:
push de
ld hl, (IP)
call readLIT
ld (IP), de
pop de
ret
; Find the entry corresponding to word where (HL) points to and sets DE to
; point to that entry.
; Z if found, NZ if not.
@ -271,7 +160,7 @@ wrCompHL:
; Spit name + prev in (HERE) and adjust (HERE) and (CURRENT)
; HL points to new (HERE)
entryhead:
call readLITBOS
call readword
ld de, (HERE)
call strcpy
ex de, hl ; (HERE) now in HL
@ -343,4 +232,3 @@ fetchline:
call stdioReadLine
ld (INPUTPOS), hl
ret