lib/expr: make recursion process a bit more orderly

Instead of going left and right, finding operators chars and replacing them
with nulls, we parse expressions in a more orderly manner, one chunk at a
time. I think it qualifies as "recursive descent", but I'm not sure.

This allows us to preserve the string we parse and should also make the
implementation of parens much easier.
This commit is contained in:
Virgil Dupras 2019-12-29 11:42:18 -05:00
parent 7410891ad1
commit 213614af33
4 changed files with 270 additions and 159 deletions

View File

@ -1,7 +1,5 @@
; *** Requirements ***
; findchar
; multDEBC
; callIXI
; ari
;
; *** Defines ***
;
@ -11,167 +9,132 @@
;
; *** Code ***
;
; Parse expression in string at (HL) and returns the result in DE.
; **This routine mutates (HL).**
; We expect (HL) to be disposable: we mutate it to avoid having to make a copy.
; This routine needs to be able to mutate (HL), but it takes care of restoring
; the string to its original value before returning.
; Sets Z on success, unset on error.
parseExpr:
push iy
push ix
push hl
call _parseExpr
push ix \ pop de
call _parseAddSubst
pop hl
pop ix
pop iy
ret
_parseExpr:
ld de, exprTbl
.loop:
ld a, (de)
or a
jp z, EXPR_PARSE ; no operator, just parse the literal
push de ; --> lvl 1. save operator row
call _findAndSplit
jr z, .found
pop de ; <-- lvl 1
inc de \ inc de \ inc de
jr .loop
.found:
; Operator found, string splitted. Left in (HL), right in (DE)
call _resolveLeftAndRight
; Whether _resolveLeftAndRight was a success, we pop our lvl 1 stack
; out, which contains our operator row. We pop it in IX.
; L-R numbers are parsed in HL (left) and DE (right).
pop ix ; <-- lvl 1
; *** Op signature ***
; The signature of "operators routines" (.plus, .mult, etc) below is this:
; Combine HL and DE with an operator (+, -, *, etc) and put the result in DE.
; Destroys HL and A. Never fails. Yes, that's a problem for division by zero.
; Don't divide by zero. All other registers are protected.
; Given a running result in DE, a rest-of-expression in (HL), a parse routine
; in IY and an apply "operator routine" in IX, (HL/DE --> DE)
; With that, parse the rest of (HL) and apply the operation on it, then place
; HL at the end of the parsed string, with A containing the last char of it,
; which can be either an operator or a null char.
; Z for success.
;
_parseApply:
push de ; --> lvl 1, left result
push ix ; --> lvl 2, routine to apply
inc hl ; after op char
call callIY ; --> DE
pop ix ; <-- lvl 2, routine to apply
; Here we do some stack kung fu. We have, in HL, a string pointer we
; want to keep. We have, in (SP), our left result we want to use.
ex (sp), hl ; <-> lvl 1
ret nz
; Resolving left and right succeeded, proceed!
inc ix ; point to routine pointer
call callIXI
push de \ pop ix
cp a ; ensure Z
push af ; --> lvl 2, save ending operator
call callIX
pop af ; <-- lvl 2, restore operator.
pop hl ; <-- lvl 1, restore str pointer
ret
; Given a string in (HL) and a separator char in A, return a splitted string,
; that is, the same (HL) string but with the found A char replaced by a null
; char. DE points to the second part of the split.
; Sets Z if found, unset if not found.
_findAndSplit:
push hl
call .skipCharLiteral
call findchar
jr nz, .end ; nothing found
; Alright, we have our char and we're pointing at it. Let's replace it
; with a null char.
xor a
ld (hl), a ; + changed to \0
inc hl
ex de, hl ; DE now points to the second part of the split
cp a ; ensure Z
.end:
pop hl ; HL is back to the start
ret
.skipCharLiteral:
; special case: if our first char is ', skip the first 3 characters
; so that we don't mistake a literal for an iterator
push af
ld a, (hl)
cp 0x27 ; '
jr nz, .skipCharLiteralEnd ; not a '
xor a ; check for null char during skipping
; skip 3
inc hl
cp (hl)
jr z, .skipCharLiteralEnd
inc hl
cp (hl)
jr z, .skipCharLiteralEnd
inc hl
.skipCharLiteralEnd:
pop af
ret
.find:
; parse expression on the left (HL) and the right (DE) and put the results in
; HL (left) and DE (right)
_resolveLeftAndRight:
ld a, (hl)
; Unless there's an error, this routine completely resolves any valid expression
; from (HL) and puts the result in DE.
; Destroys HL
; Z for success.
_parseAddSubst:
call _parseMultDiv
ret nz
.loop:
; do we have an operator?
or a
jr z, .noleft
; Parse left operand in (HL)
push de ; --> lvl 1
call parseExpr
pop hl ; <-- lvl 1, orig DE
ret nz ; return immediately if error
.parseright:
; Now we have parsed everything to the left and we have its result in
; DE. What we need to do now is the same thing on (DE) and then apply
; the + operator. Let's save DE somewhere and parse this.
push de ; --> lvl 1
; right expr in (HL)
call parseExpr ; DE is set
pop hl ; <-- lvl 1. left value
ret ; Z is parseExpr's result
.noleft:
; special case: is (HL) zero? If yes, it means that our left operand
; is empty. consider it as 0
ex de, hl ; (DE) goes in (HL) for .parseright
ld de, 0
jr .parseright
; Routines in here all have the same signature: they take two numbers, DE (left)
; and IX (right), apply the operator and put the resulting number in DE.
; The table has 3 bytes per row: 1 byte for operator and 2 bytes for routine
; pointer.
exprTbl:
.db '+'
.dw .plus
.db '-'
.dw .minus
.db '*'
.dw .mult
.db '/'
.dw .div
.db '%'
.dw .mod
.db '&'
.dw .and
.db 0x7c ; '|'
.dw .or
.db '^'
.dw .xor
.db '}'
.dw .rshift
.db '{'
.dw .lshift
.db 0 ; end of table
ret z ; null char, we're done
; We have an operator. Resolve the rest of the expr then apply it.
ld ix, .plus
cp '+'
jr z, .found
ld ix, .minus
cp '-'
ret nz ; unknown char, error
.found:
ld iy, _parseMultDiv
call _parseApply
ret nz
jr .loop
.plus:
add hl, de
ex de, hl
ret
.minus:
or a ; clear carry
or a ; clear carry
sbc hl, de
ex de, hl
ret
; Parse (HL) as far as it can, that is, resolving expressions at its level or
; lower (anything but + and -).
; A is set to the last op it encountered. Unless there's an error, this can only
; be +, - or null. Null if we're done parsing, + and - if there's still work to
; do.
; (HL) points to last op encountered.
; DE is set to the numerical value of everything that was parsed left of (HL).
_parseMultDiv:
call _parseBitShift
ret nz
.loop:
; do we have an operator?
or a
ret z ; null char, we're done
; We have an operator. Resolve the rest of the expr then apply it.
ld ix, .mult
cp '*'
jr z, .found
ld ix, .div
cp '/'
jr z, .found
ld ix, .mod
cp '%'
jr z, .found
; might not be an error, return success
cp a
ret
.found:
ld iy, _parseBitShift
call _parseApply
ret nz
jr .loop
.mult:
push bc ; --> lvl 1
ld b, h
ld c, l
call multDEBC ; --> HL
pop bc ; <-- lvl 1
ex de, hl
ret
.div:
; divide takes HL/DE
push bc
ld a, l
push bc ; --> lvl 1
call divide
ld e, c
ld d, b
pop bc
pop bc ; <-- lvl 1
ret
.mod:
@ -179,6 +142,39 @@ exprTbl:
ex de, hl
ret
; Same as _parseMultDiv, but a layer lower.
_parseBitShift:
call _parseNumber
ret nz
.loop:
; do we have an operator?
or a
ret z ; null char, we're done
; We have an operator. Resolve the rest of the expr then apply it.
ld ix, .and
cp '&'
jr z, .found
ld ix, .or
cp 0x7c ; '|'
jr z, .found
ld ix, .xor
cp '^'
jr z, .found
ld ix, .rshift
cp '}'
jr z, .found
ld ix, .lshift
cp '{'
jr z, .found
; might not be an error, return success
cp a
ret
.found:
ld iy, _parseNumber
call _parseApply
ret nz
jr .loop
.and:
ld a, h
and d
@ -209,26 +205,130 @@ exprTbl:
ld a, e
and 0xf
ret z
push bc
push bc ; --> lvl 1
ld b, a
.rshiftLoop:
srl h
rr l
djnz .rshiftLoop
ex de, hl
pop bc
pop bc ; <-- lvl 1
ret
.lshift:
ld a, e
and 0xf
ret z
push bc
push bc ; --> lvl 1
ld b, a
.lshiftLoop:
sla l
rl h
djnz .lshiftLoop
ex de, hl
pop bc
pop bc ; <-- lvl 1
ret
; Parse first number of expression at (HL). A valid number is anything that can
; be parsed by EXPR_PARSE and is followed either by a null char or by any of the
; operator chars. This routines takes care of replacing an operator char with
; the null char before calling EXPR_PARSE and then replace the operator back
; afterwards.
; HL is moved to the char following the number having been parsed.
; DE contains the numerical result.
; A contains the operator char following the number (or null). Only on success.
; Z for success.
_parseNumber:
; Special case 1: number starts with '-'
ld a, (hl)
cp '-'
jr nz, .skip1
; We have a negative number. Parse normally, then subst from zero
inc hl
call _parseNumber
push hl ; --> lvl 1
ex af, af' ; preserve flags
or a ; clear carry
ld hl, 0
sbc hl, de
ex de, hl
ex af, af' ; restore flags
pop hl ; <-- lvl 1
ret
.skip1:
; End of special case 1
push ix
; Copy beginning of string to DE, we'll need it later
ld d, h
ld e, l
; Special case 2: we have a char literal. If we have a char literal, we
; don't want to go through the "_isOp" loop below because if that char
; is one of our operators, we're messing up our processing. So, set
; ourselves 3 chars further and continue from there. EXPR_PARSE will
; take care of validating those 3 chars.
cp 0x27 ; apostrophe (') char
jr nz, .skip2
; "'". advance HL by 3
inc hl \ inc hl \ inc hl
; End of special case 2
.skip2:
dec hl ; offset "inc-hl-before" in loop
.loop:
inc hl
ld a, (hl)
call _isOp
jr nz, .loop
; (HL) and A is an op or a null
push af ; --> lvl 1 save op
push hl ; --> lvl 2 save end of string
; temporarily put a null char instead of the op
xor a
ld (hl), a
ex de, hl ; rewind to beginning of number
call EXPR_PARSE
ex af, af' ; keep result flags away while we restore (HL)
push ix \ pop de ; result in DE
pop hl ; <-- lvl 2, end of string
pop af ; <-- lvl 1, saved op
ld (hl), a
ex af, af' ; restore Z from EXPR_PARSE
jr nz, .end
; HL is currently at the end of the number's string
; On success, have A be the operator char following the number
ex af, af'
.end:
pop ix
ret
; Sets Z if A contains a valid operator char or a null char.
_isOp:
or a
ret z
push hl ; --> lvl 1
; Set A' to zero for quick end-of-table checks
ex af, af'
xor a
ex af, af'
ld hl, .exprChars
.loop:
cp (hl)
jr z, .found
ex af, af'
cp (hl)
jr z, .notFound ; end of table
ex af, af'
inc hl ; next char
jr .loop
.notFound:
ex af, af' ; restore orig A
inc a ; unset Z
.found:
; Z already set
pop hl ; <-- lvl 1
ret
.exprChars:
.db "+-*/%&|^{}", 0

View File

@ -51,6 +51,24 @@ assertEQW:
.msg:
.db "HL != DE", CR, LF, 0
; Given a list of pointer to test data structures in HL and a pointer to a test
; routine in IX, call (IX) with HL pointing to the test structure until the list
; points to a zero. See testParseExpr in test_expr for an example usage.
testList:
push hl ; --> lvl 1
call intoHL
ld a, h
or l
jr z, .end
call callIX
call nexttest
pop hl ; <-- lvl 1
inc hl \ inc hl
jr testList
.end:
pop hl ; <-- lvl 1
ret
nexttest:
ld a, (testNum)
inc a

View File

@ -109,35 +109,18 @@ test:
halt
testParseExpr:
ld iy, .t1
call .testEQ
ld iy, .t2
call .testEQ
ld iy, .t3
call .testEQ
ld iy, .t4
call .testEQ
ld iy, .t5
call .testEQ
ld iy, .t6
call .testEQ
ld iy, .t7
call .testEQ
ld iy, .t8
call .testEQ
ld iy, .t9
call .testEQ
ret
ld hl, .alltests
ld ix, .test
jp testList
.testEQ:
push iy \ pop hl
.test:
push hl \ pop iy
inc hl \ inc hl
call parseExpr
call assertZ
ld l, (iy)
ld h, (iy+1)
call assertEQW
jp nexttest
jp assertEQW
.t1:
.dw 7
@ -166,3 +149,13 @@ testParseExpr:
.t9:
.dw 10
.db "2*3+4", 0
; There was this untested regression during the replacement of find-and-subst
; parseExpr to the recursive descent one. It was time consuming to find. Here
; it goes, here it stays.
.t10:
.dw '-'+1
.db "'-'+1", 0
.alltests:
.dw .t1, .t2, .t3, .t4, .t5, .t6, .t7, .t8, .t9, .t10, 0

View File

@ -13,9 +13,9 @@ cmpas() {
echo ok
else
echo actual
echo $ACTUAL
echo "$ACTUAL"
echo expected
echo $EXPECTED
echo "$EXPECTED"
exit 1
fi
}