diff --git a/emul/forth/stage.c b/emul/forth/stage.c
index 4febf68..060d720 100644
--- a/emul/forth/stage.c
+++ b/emul/forth/stage.c
@@ -29,7 +29,7 @@ trouble of compiling defs to binary.
 
 //#define DEBUG
 // in sync with glue.asm
-#define RAMSTART 0x890
+#define RAMSTART 0x850
 #define STDIO_PORT 0x00
 // To know which part of RAM to dump, we listen to port 2, which at the end of
 // its compilation process, spits its HERE addr to port 2 (MSB first)
diff --git a/emul/forth/z80c.bin b/emul/forth/z80c.bin
index 5bfeeed..4d5549f 100644
Binary files a/emul/forth/z80c.bin and b/emul/forth/z80c.bin differ
diff --git a/forth/forth.asm b/forth/forth.asm
index 0681b69..9982687 100644
--- a/forth/forth.asm
+++ b/forth/forth.asm
@@ -1,34 +1,5 @@
-; Collapse OS' Forth
-;
-; Unlike other assembler parts of Collapse OS, this unit is one huge file.
-;
-; I do this because as Forth takes a bigger place, assembler is bound to take
-; less and less place. I am thus consolidating that assembler code in one
-; place so that I have a better visibility of what to minimize.
-;
-; I also want to reduce the featureset of the assembler so that Collapse OS
-; self-hosts in a more compact manner. File include is a big part of the
-; complexity in zasm. If we can get rid of it, we'll be more compact.
+; Collapse OS Forth's boot binary
 
-; *** ABI STABILITY ***
-;
-; This unit needs to have some of its entry points stay at a stable offset.
-; These have a comment over them indicating the expected offset. These should
-; not move until the Grand Bootstrapping operation has been completed.
-;
-; When you see random ".fill" here and there, it's to ensure that stability.
-
-; *** Defines ***
-; GETC: address of a GetC routine
-; PUTC: address of a PutC routine
-;
-; Those GetC/PutC routines are hooked through defines and have this API:
-;
-; GetC: Blocks until a character is read from the device and return that
-;       character in A.
-;
-; PutC: Write character specified in A onto the device.
-;
 ; *** Const ***
 ; Base of the Return Stack
 .equ	RS_ADDR		0xf000
@@ -72,39 +43,12 @@
 ; (HERE) will begin at a strategic place.
 .equ	HERE_INITIAL	RAMEND
 
-; EXECUTION MODEL
-; After having read a line through readline, we want to interpret it. As
-; a general rule, we go like this:
-;
-; 1. read single word from line
-; 2. Can we find the word in dict?
-; 3. If yes, execute that word, goto 1
-; 4. Is it a number?
-; 5. If yes, push that number to PS, goto 1
-; 6. Error: undefined word.
-;
-; EXECUTING A WORD
-;
-; At it's core, executing a word is having the wordref in IY and call
-; EXECUTE. Then, we let the word do its things. Some words are special,
-; but most of them are of the compiledWord type, and that's their execution that
-; we describe here.
-;
-; First of all, at all time during execution, the Interpreter Pointer (IP)
-; points to the wordref we're executing next.
-;
-; When we execute a compiledWord, the first thing we do is push IP to the Return
-; Stack (RS). Therefore, RS' top of stack will contain a wordref to execute
-; next, after we EXIT.
-;
-; At the end of every compiledWord is an EXIT. This pops RS, sets IP to it, and
-; continues.
-
 ; *** Stable ABI ***
 ; Those jumps below are supposed to stay at these offsets, always. If they
 ; change bootstrap binaries have to be adjusted because they rely on them.
 ; Those entries are referenced directly by their offset in Forth code with a
 ; comment indicating what that number refers to.
+;
 ; We're at 0 here
 	jp	forthMain
 ; 3
@@ -138,7 +82,86 @@
 	jp	parseDecimal
 	jp	doesWord
 
-; *** Code ***
+; *** Boot dict ***
+; There are only 5 words in the boot dict, but these words' offset need to be
+; stable, so they're part of the "stable ABI"
+
+; Pop previous IP from Return stack and execute it.
+; ( R:I -- )
+	.db	"EXIT"
+	.dw	0
+	.db	4
+EXIT:
+	.dw nativeWord
+	call	popRSIP
+	jp	next
+
+	.db	"(br)"
+	.dw	$-EXIT
+	.db	4
+BR:
+	.dw	nativeWord
+	ld	hl, (IP)
+	ld	e, (hl)
+	inc	hl
+	ld	d, (hl)
+	dec	hl
+	add	hl, de
+	ld	(IP), hl
+	jp	next
+
+	.db	"(?br)"
+	.dw	$-BR
+	.db	5
+CBR:
+	.dw	nativeWord
+	pop	hl
+	call	chkPS
+	ld	a, h
+	or	l
+	jr	z, BR+2		; False, branch
+	; True, skip next 2 bytes and don't branch
+	ld	hl, (IP)
+	inc	hl
+	inc	hl
+	ld	(IP), hl
+	jp	next
+
+	.db	","
+	.dw	$-CBR
+	.db	1
+WR:
+	.dw	nativeWord
+	pop	de
+	call	chkPS
+	ld	hl, (HERE)
+	ld	(hl), e
+	inc	hl
+	ld	(hl), d
+	inc	hl
+	ld	(HERE), hl
+	jp	next
+
+; ( addr -- )
+	.db "EXECUTE"
+	.dw $-WR
+	.db 7
+EXECUTE:
+	.dw nativeWord
+	pop	iy	; is a wordref
+	call	chkPS
+	ld	l, (iy)
+	ld	h, (iy+1)
+	; HL points to code pointer
+	inc	iy
+	inc	iy
+	; IY points to PFA
+	jp	(hl)	; go!
+
+; Offset: 00b8
+.out $
+; *** End of stable ABI ***
+
 forthMain:
 	; STACK OVERFLOW PROTECTION:
 	; To avoid having to check for stack underflow after each pop operation
@@ -167,11 +190,6 @@ forthMain:
 .bootName:
 	.db	"BOOT", 0
 
-.fill 95
-
-; STABLE ABI
-; Offset: 00cd
-.out $
 ; copy (HL) into DE, then exchange the two, utilising the optimised HL instructions.
 ; ld must be done little endian, so least significant byte first.
 intoHL:
@@ -183,32 +201,6 @@ intoHL:
 	pop 	de
 	ret
 
-; add the value of A into HL
-; affects carry flag according to the 16-bit addition, Z, S and P untouched.
-addHL:
-	push	de
-	ld 	d, 0
-	ld	e, a
-	add	hl, de
-	pop	de
-	ret
-
-; Copy string from (HL) in (DE), that is, copy bytes until a null char is
-; encountered. The null char is also copied.
-; HL and DE point to the char right after the null char.
-; B indicates the length of the copied string, including null-termination.
-strcpy:
-	ld	b, 0
-.loop:
-	ld	a, (hl)
-	ld	(de), a
-	inc	hl
-	inc	de
-	inc	b
-	or	a
-	jr	nz, .loop
-	ret
-
 ; Compares strings pointed to by HL and DE until one of them hits its null char.
 ; If equal, Z is set. If not equal, Z is reset. C is set if HL > DE
 strcmp:
@@ -327,7 +319,6 @@ parseDecimal:
 	xor	a	; set Z
 	ret
 
-; *** Support routines ***
 ; Find the entry corresponding to word where (HL) points to and sets DE to
 ; point to that entry.
 ; Z if found, NZ if not.
@@ -420,26 +411,6 @@ flagsToBC:
 	dec	bc
 	ret
 
-; Write DE in (HL), advancing HL by 2.
-DEinHL:
-	ld	(hl), e
-	inc	hl
-	ld	(hl), d
-	inc	hl
-	ret
-
-; *** Stack management ***
-; The Parameter stack (PS) is maintained by SP and the Return stack (RS) is
-; maintained by IX. This allows us to generally use push and pop freely because
-; PS is the most frequently used. However, this causes a problem with routine
-; calls: because in Forth, the stack isn't balanced within each call, our return
-; offset, when placed by a CALL, messes everything up. This is one of the
-; reasons why we need stack management routines below. IX always points to RS'
-; Top Of Stack (TOS)
-;
-; This return stack contain "Interpreter pointers", that is a pointer to the
-; address of a word, as seen in a compiled list of words.
-
 ; Push value HL to RS
 pushRS:
 	inc	ix
@@ -485,30 +456,13 @@ chkPS:
 	ret	nc		; (INITIAL_SP) >= SP? good
 	jp	abortUnderflow
 
-; *** Dictionary ***
-; It's important that this part is at the end of the resulting binary.
-; A dictionary entry has this structure:
-; - Xb name. Arbitrary long number of character (but can't be bigger than
-;   input buffer, of course). not null-terminated
-; - 2b prev offset
-; - 1b size + IMMEDIATE flag
-; - 2b code pointer
-; - Parameter field (PF)
-;
-; The prev offset is the number of bytes between the prev field and the
-; previous word's code pointer.
-;
-; The size + flag indicate the size of the name field, with the 7th bit
-; being the IMMEDIATE flag.
-;
-; The code pointer point to "word routines". These routines expect to be called
-; with IY pointing to the PF. They themselves are expected to end by jumping
-; to the address at (IP). They will usually do so with "jp next".
-;
-; That's for "regular" words (words that are part of the dict chain). There are
-; also "special words", for example NUMBER, LIT, FBR, that have a slightly
-; different structure. They're also a pointer to an executable, but as for the
-; other fields, the only one they have is the "flags" field.
+abortUnderflow:
+	ld	hl, .name
+	call	find
+	push	de
+	jp	EXECUTE+2
+.name:
+	.db "(uflw)", 0
 
 ; This routine is jumped to at the end of every word. In it, we jump to current
 ; IP, but we also take care of increasing it my 2 before jumping
@@ -529,6 +483,8 @@ next:
 	jp	EXECUTE+2
 
 
+; *** Word routines ***
+
 ; Execute a word containing native code at its PF address (PFA)
 nativeWord:
 	jp	(iy)
@@ -599,99 +555,16 @@ litWord:
 	ld	(IP), hl
 	jp	next
 
-; Pop previous IP from Return stack and execute it.
-; ( R:I -- )
-	.db	"EXIT"
-	.dw	0
-	.db	4
-EXIT:
-	.dw nativeWord
-	call	popRSIP
-	jp	next
-
-.fill 30
-
-abortUnderflow:
-	ld	hl, .name
-	call	find
-	push	de
-	jp	EXECUTE+2
-.name:
-	.db "(uflw)", 0
-
-	.db	"(br)"
-	.dw	$-EXIT
-	.db	4
-BR:
-	.dw	nativeWord
-	ld	hl, (IP)
-	ld	e, (hl)
-	inc	hl
-	ld	d, (hl)
-	dec	hl
-	add	hl, de
-	ld	(IP), hl
-	jp	next
-
-.fill 72
-
-	.db	"(?br)"
-	.dw	$-BR
-	.db	5
-CBR:
-	.dw	nativeWord
-	pop	hl
-	call	chkPS
-	ld	a, h
-	or	l
-	jp	z, BR+2		; False, branch
-	; True, skip next 2 bytes and don't branch
-	ld	hl, (IP)
-	inc	hl
-	inc	hl
-	ld	(IP), hl
-	jp	next
-
-.fill 15
-
-	.db	","
-	.dw	$-CBR
-	.db	1
-WR:
-	.dw	nativeWord
-	pop	de
-	call	chkPS
-	ld	hl, (HERE)
-	call	DEinHL
-	ld	(HERE), hl
-	jp	next
-
-.fill 100
-
-; ( addr -- )
-	.db "EXECUTE"
-	.dw $-WR
-	.db 7
-; STABLE ABI
-; Offset: 0388
-.out $
-EXECUTE:
-	.dw nativeWord
-	pop	iy	; is a wordref
-	call	chkPS
-	ld	l, (iy)
-	ld	h, (iy+1)
-	; HL points to code pointer
-	inc	iy
-	inc	iy
-	; IY points to PFA
-	jp	(hl)	; go!
-
-
-.fill 677
+; *** Dict hook ***
+; This dummy dictionary entry serves two purposes:
+; 1. Allow binary grafting. Because each binary dict always end with a dummy
+;    entry, we always have a predictable prev offset for the grafter's first
+;    entry.
+; 2. Tell icore's "_c" routine where the boot binary ends. See comment there.
 
 	.db	"_bend"
 	.dw	$-EXECUTE
 	.db	5
-; Offset: 0647
+
+; Offset: 0253
 .out $
diff --git a/forth/notes.txt b/forth/notes.txt
new file mode 100644
index 0000000..02ced83
--- /dev/null
+++ b/forth/notes.txt
@@ -0,0 +1,68 @@
+Collapse OS' Forth implementation notes
+
+*** EXECUTION MODEL
+
+After having read a line through readln, we want to interpret it. As a general
+rule, we go like this:
+
+1. read single word from line
+2. Can we find the word in dict?
+3. If yes, execute that word, goto 1
+4. Is it a number?
+5. If yes, push that number to PS, goto 1
+6. Error: undefined word.
+
+*** EXECUTING A WORD
+
+At it's core, executing a word is pushing the wordref on PS and calling EXECUTE.
+Then, we let the word do its things. Some words are special, but most of them
+are of the compiledWord type, and that's their execution that we describe here.
+
+First of all, at all time during execution, the Interpreter Pointer (IP) points
+to the wordref we're executing next.
+
+When we execute a compiledWord, the first thing we do is push IP to the Return
+Stack (RS). Therefore, RS' top of stack will contain a wordref to execute next,
+after we EXIT.
+
+At the end of every compiledWord is an EXIT. This pops RS, sets IP to it, and
+continues.
+
+*** Stack management
+
+The Parameter stack (PS) is maintained by SP and the Return stack (RS) is
+maintained by IX. This allows us to generally use push and pop freely because PS
+is the most frequently used. However, this causes a problem with routine calls:
+because in Forth, the stack isn't balanced within each call, our return offset,
+when placed by a CALL, messes everything up. This is one of the reasons why we
+need stack management routines below. IX always points to RS' Top Of Stack (TOS)
+
+This return stack contain "Interpreter pointers", that is a pointer to the
+address of a word, as seen in a compiled list of words.
+
+*** Dictionary
+
+A dictionary entry has this structure:
+
+- Xb name. Arbitrary long number of character (but can't be bigger than
+  input buffer, of course). not null-terminated
+- 2b prev offset
+- 1b size + IMMEDIATE flag
+- 2b code pointer
+- Parameter field (PF)
+
+The prev offset is the number of bytes between the prev field and the previous
+word's code pointer.
+
+The size + flag indicate the size of the name field, with the 7th bit being the
+IMMEDIATE flag.
+
+The code pointer point to "word routines". These routines expect to be called
+with IY pointing to the PF. They themselves are expected to end by jumping to
+the address at (IP). They will usually do so with "jp next".
+
+That's for "regular" words (words that are part of the dict chain). There are
+also "special words", for example NUMBER, LIT, FBR, that have a slightly
+different structure. They're also a pointer to an executable, but as for the
+other fields, the only one they have is the "flags" field.
+
diff --git a/forth/z80a.fs b/forth/z80a.fs
index 5aab069..b97be70 100644
--- a/forth/z80a.fs
+++ b/forth/z80a.fs
@@ -39,6 +39,7 @@
 : OP1 CREATE C, DOES> C@ A, ;
 0xeb OP1 EXDEHL,
 0x76 OP1 HALT,
+0xe9 OP1 JP(HL),
 0x12 OP1 LD(DE)A,
 0x1a OP1 LDA(DE),
 0xc9 OP1 RET,