From 7ca54d179d6a5272f6f50160db855ab9ef72d2d4 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Mon, 30 Dec 2019 19:24:53 -0500 Subject: [PATCH] lib/expr: make EXPR_PARSE "tail" HL Things are now much simpler. --- apps/README.md | 2 -- apps/basic/var.asm | 12 +++---- apps/lib/expr.asm | 72 +++----------------------------------- apps/lib/parse.asm | 78 ++++++++++++++++++++---------------------- apps/zasm/parse.asm | 47 ++++++++++++++----------- apps/zasm/symbol.asm | 77 +++++++++++++++++++++++++++++++++++++++-- apps/zasm/util.asm | 10 ------ tools/tests/unit/test_expr.asm | 12 ++++++- 8 files changed, 159 insertions(+), 151 deletions(-) diff --git a/apps/README.md b/apps/README.md index e227fca..6be1999 100644 --- a/apps/README.md +++ b/apps/README.md @@ -74,8 +74,6 @@ There are decimal, hexadecimal and binary literals. A "straight" number is parsed as a decimal. Hexadecimal literals must be prefixed with `0x` (`0xf4`). Binary must be prefixed with `0b` (`0b01100110`). -A decimal literal cannot start with `0`, with the exception of the `0` literal. - Decimals and hexadecimal are "flexible". Whether they're written in a byte or a word, you don't need to prefix them with zeroes. Watch out for overflow, however. diff --git a/apps/basic/var.asm b/apps/basic/var.asm index 2ce44c9..8332e78 100644 --- a/apps/basic/var.asm +++ b/apps/basic/var.asm @@ -84,15 +84,12 @@ varAssign: ; Check if value at (HL) is a variable. If yes, returns its associated value. ; Otherwise, jump to parseLiteral. parseLiteralOrVar: - inc hl - ld a, (hl) - dec hl - or a - ; if more than one in length, it can't be a variable - jp nz, parseLiteral + call isLiteralPrefix + jp z, parseLiteral + ; not a literal, try var ld a, (hl) call varChk - jp nz, parseLiteral + ret nz ; It's a variable, resolve! add a, a ; * 2 because each element is a word push hl ; --> lvl 1 @@ -102,5 +99,6 @@ parseLiteralOrVar: inc hl ld d, (hl) pop hl ; <-- lvl 1 + inc hl ; point to char after variable cp a ; ensure Z ret diff --git a/apps/lib/expr.asm b/apps/lib/expr.asm index df0b2b2..df25300 100644 --- a/apps/lib/expr.asm +++ b/apps/lib/expr.asm @@ -6,6 +6,8 @@ ; EXPR_PARSE: routine to call to parse literals or symbols that are part of ; the expression. Routine's signature: ; String in (HL), returns its parsed value to DE. Z for success. +; HL is advanced to the character following the last successfully +; read char. ; ; *** Code *** ; @@ -258,74 +260,8 @@ _parseNumber: ret .skip1: ; End of special case 1 - ; Copy beginning of string to DE, we'll need it later - ld d, h - ld e, l - - ; Special case 2: we have a char literal. If we have a char literal, we - ; don't want to go through the "_isOp" loop below because if that char - ; is one of our operators, we're messing up our processing. So, set - ; ourselves 3 chars further and continue from there. EXPR_PARSE will - ; take care of validating those 3 chars. - cp 0x27 ; apostrophe (') char - jr nz, .skip2 - ; "'". advance HL by 3 - inc hl \ inc hl \ inc hl - ; End of special case 2 -.skip2: - - dec hl ; offset "inc-hl-before" in loop -.loop: - inc hl - ld a, (hl) - call _isOp - jr nz, .loop - ; (HL) and A is an op or a null - push af ; --> lvl 1 save op - push hl ; --> lvl 2 save end of string - ; temporarily put a null char instead of the op - xor a - ld (hl), a - ex de, hl ; rewind to beginning of number call EXPR_PARSE ; --> DE - ex af, af' ; keep result flags away while we restore (HL) - pop hl ; <-- lvl 2, end of string - pop af ; <-- lvl 1, saved op - ld (hl), a - ex af, af' ; restore Z from EXPR_PARSE ret nz - ; HL is currently at the end of the number's string - ; On success, have A be the operator char following the number - ex af, af' + ; Check if (HL) points to null or op + ld a, (hl) ret - -; Sets Z if A contains a valid operator char or a null char. -_isOp: - or a - ret z - push hl ; --> lvl 1 - ; Set A' to zero for quick end-of-table checks - ex af, af' - xor a - ex af, af' - ld hl, .exprChars -.loop: - cp (hl) - jr z, .found - ex af, af' - cp (hl) - jr z, .notFound ; end of table - ex af, af' - inc hl ; next char - jr .loop -.notFound: - ex af, af' ; restore orig A - inc a ; unset Z -.found: - ; Z already set - pop hl ; <-- lvl 1 - ret - -.exprChars: - .db "+-*/%&|^{}", 0 - diff --git a/apps/lib/parse.asm b/apps/lib/parse.asm index 115c4af..2d8c3cb 100644 --- a/apps/lib/parse.asm +++ b/apps/lib/parse.asm @@ -158,6 +158,7 @@ parseBinaryLiteral: ; Parses the string at (HL) and returns the 16-bit value in DE. The string ; can be a decimal literal (1234), a hexadecimal literal (0x1234) or a char ; literal ('X'). +; HL is advanced to the character following the last successfully read char. ; ; As soon as the number doesn't fit 16-bit any more, parsing stops and the ; number is invalid. If the number is valid, Z is set, otherwise, unset. @@ -166,60 +167,57 @@ parseLiteral: ld a, (hl) cp 0x27 ; apostrophe jr z, .char + call isDigit + ret nz cp '0' - jr z, .hexOrBin - push hl - call parseDecimalC - pop hl - ret + jp nz, parseDecimal + ; maybe hex, maybe binary + inc hl + ld a, (hl) + inc hl ; already place it for hex or bin + cp 'x' + jr z, parseHexadecimal + cp 'b' + jr z, parseBinaryLiteral + ; nope, just a regular decimal + dec hl \ dec hl + jp parseDecimal ; Parse string at (HL) and, if it is a char literal, sets Z and return ; corresponding value in E. D is always zero. +; HL is advanced to the character following the last successfully read char. ; ; A valid char literal starts with ', ends with ' and has one character in the ; middle. No escape sequence are accepted, but ''' will return the apostrophe ; character. .char: - push hl inc hl + ld e, (hl) ; our result inc hl cp (hl) - jr nz, .charEnd ; not ending with an apostrophe + jr nz, .charError ; not ending with an apostrophe + ; good char, advance HL and return inc hl - ld a, (hl) - or a ; cp 0 - jr nz, .charEnd ; string has to end there - ; Valid char, good - dec hl - dec hl - ld e, (hl) - cp a ; ensure Z -.charEnd: - pop hl + ; Z already set + ret +.charError: + ; In all error conditions, HL is advanced by 2. Rewind. + dec hl \ dec hl + ; NZ already set ret -.hexOrBin: - inc hl - ld a, (hl) - inc hl ; already place it for hex or bin - cp 'x' - jr z, .hex - cp 'b' - jr z, .bin - ; special case: single '0'. set Z if we hit have null terminating. - or a -.hexOrBinEnd: - dec hl \ dec hl ; replace HL - ret ; Z already set -.hex: - push hl - call parseHexadecimal - pop hl - jr .hexOrBinEnd +; Returns whether A is a literal prefix, that is, a digit or an apostrophe. +isLiteralPrefix: + cp 0x27 ; apostrophe + ret z + ; continue to isDigit -.bin: - push hl - call parseBinaryLiteral - pop hl - jr .hexOrBinEnd +; Returns whether A is a digit +isDigit: + cp '0' + jp c, unsetZ + cp '9'+1 + jp nc, unsetZ + cp a ; ensure Z + ret diff --git a/apps/zasm/parse.asm b/apps/zasm/parse.asm index 2511552..de32513 100644 --- a/apps/zasm/parse.asm +++ b/apps/zasm/parse.asm @@ -1,27 +1,26 @@ ; Parse string in (HL) and return its numerical value whether its a number ; literal or a symbol. Returns value in DE. +; HL is advanced to the character following the last successfully read char. ; Sets Z if number or symbol is valid, unset otherwise. parseNumberOrSymbol: - call parseLiteral - ret z - ; Not a number. - ; Is str a single char? If yes, maybe it's a special symbol. - call strIs1L - jr nz, .symbol ; nope + call isLiteralPrefix + jp z, parseLiteral + ; Not a number. try symbol ld a, (hl) cp '$' - jr z, .returnPC + jr z, .PC cp '@' - jr nz, .symbol - ; last val - ld de, (DIREC_LASTVAL) - ret -.symbol: + jr z, .lastVal + call symParse + ret nz + ; HL at end of symbol name, DE at tmp null-terminated symname. + push hl ; --> lvl 1 + ex de, hl call symFindVal ; --> DE - jr nz, .notfound - ret -.notfound: - ; If not found, check if we're in first pass. If we are, it doesn't + pop hl ; <-- lvl 1 + ret z + ; not found + ; When not found, check if we're in first pass. If we are, it doesn't ; matter that we didn't find our symbol. Return success anyhow. ; Otherwise return error. Z is already unset, so in fact, this is the ; same as jumping to zasmIsFirstPass @@ -30,9 +29,17 @@ parseNumberOrSymbol: ld de, 0 jp zasmIsFirstPass -.returnPC: - push hl - call zasmGetPC +.PC: + ex de, hl + call zasmGetPC ; --> HL ex de, hl ; result in DE - pop hl + inc hl ; char after last read + ; Z already set from cp '$' + ret + +.lastVal: + ; last val + ld de, (DIREC_LASTVAL) + inc hl ; char after last read + ; Z already set from cp '@' ret diff --git a/apps/zasm/symbol.asm b/apps/zasm/symbol.asm index 08f4312..a1091c9 100644 --- a/apps/zasm/symbol.asm +++ b/apps/zasm/symbol.asm @@ -9,6 +9,12 @@ ; first pass" whenever we encounter a new context. That is, we wipe the local ; registry, parse the code until the next global symbol (or EOF), then rewind ; and continue second pass as usual. +; +; What is a symbol name? The accepted characters for a symbol are A-Z, a-z, 0-9 +; dot (.) and underscore (_). +; This unit doesn't disallow symbols starting with a digit, but in effect, they +; aren't going to work because parseLiteral is going to get that digit first. +; So, make your symbols start with a letter or dot or underscore. ; *** Constants *** ; Size of each record in registry @@ -18,6 +24,9 @@ .equ SYM_LOC_REGSIZE ZASM_LREG_BUFSZ+1+ZASM_LREG_MAXCNT*SYM_RECSIZE +; Maximum name length for a symbol +.equ SYM_NAME_MAXLEN 0x20 + ; *** Variables *** ; A registry has three parts: record count (byte) record list and names pool. ; A record is a 3 bytes structure: @@ -34,9 +43,11 @@ ; Global labels registry .equ SYM_GLOB_REG SYM_RAMSTART -.equ SYM_LOC_REG SYM_GLOB_REG+SYM_REGSIZE -.equ SYM_CONST_REG SYM_LOC_REG+SYM_LOC_REGSIZE -.equ SYM_RAMEND SYM_CONST_REG+SYM_REGSIZE +.equ SYM_LOC_REG @+SYM_REGSIZE +.equ SYM_CONST_REG @+SYM_LOC_REGSIZE +; Area where we parse symbol names into +.equ SYM_TMPNAME @+SYM_REGSIZE +.equ SYM_RAMEND @+SYM_NAME_MAXLEN+1 ; *** Registries *** ; A symbol registry is a 5 bytes record with points to the name pool then the @@ -267,3 +278,63 @@ _symIsFull: pop hl ret +; Parse string (HL) as far as it can for a valid symbol name (see definition in +; comment at top) for a maximum of SYM_NAME_MAXLEN characters. Puts the parsed +; symbol, null-terminated, in SYM_TMPNAME. Make DE point to SYM_TMPNAME. +; HL is advanced to the character following the last successfully read char. +; Z for success. +; Error conditions: +; 1 - No character parsed. +; 2 - name too long. +symParse: + ld de, SYM_TMPNAME + push bc + ; +1 because we want to loop one extra time to see if the char is good + ; or bad. If it's bad, then fine, proceed as normal. If it's good, then + ; its going to go through djnz and we can return an error then. + ld b, SYM_NAME_MAXLEN+1 +.loop: + ld a, (hl) + ; Set it directly, even if we don't know yet if it's good + ld (de), a + or a ; end of string? + jr z, .end ; easy ending, Z set, HL set + ; Check special symbols first + cp '.' + jr z, .good + cp '_' + jr z, .good + ; lowercase + or 0x20 + cp '0' + jr c, .bad + cp '9'+1 + jr c, .good + cp 'a' + jr c, .bad + cp 'z'+1 + jr nc, .bad +.good: + ; character is valid, continue! + inc hl + inc de + djnz .loop + ; error: string too long + ; NZ is already set from cp 'z'+1 + ; HL is one char too far + dec hl + jr .end +.bad: + ; invalid char, stop where we are. + ; In all cases, we want to null-terminate that string + xor a + ld (de), a + ; HL is good. Now, did we succeed? to know, let's see where B is. + ld a, b + cp SYM_NAME_MAXLEN+1 + ; Our result is the invert of Z + call toggleZ +.end: + ld de, SYM_TMPNAME + pop bc + ret diff --git a/apps/zasm/util.asm b/apps/zasm/util.asm index 4ad85f0..b1bba10 100644 --- a/apps/zasm/util.asm +++ b/apps/zasm/util.asm @@ -30,16 +30,6 @@ toggleZ: cp a ret -; Sets Z if string at (HL) is one character long -strIs1L: - xor a - cp (hl) - jp z, unsetZ ; empty string - inc hl - cp (hl) ; Z has proper value - dec hl ; doesn't touch Z - ret - ; Compares strings pointed to by HL and DE up to A count of characters in a ; case-insensitive manner. ; If equal, Z is set. If not equal, Z is reset. diff --git a/tools/tests/unit/test_expr.asm b/tools/tests/unit/test_expr.asm index fb57049..8ef5a6a 100644 --- a/tools/tests/unit/test_expr.asm +++ b/tools/tests/unit/test_expr.asm @@ -128,9 +128,19 @@ testParseExpr: .dw 0x4080 .db "FOO+BAR*4", 0 +; "0" is a special case, let's test it +.t16: + .dw 0 + .db "0", 0 + +; Another one that caused troubles +.t17: + .dw 123 + .db "0+123", 0 + .alltests: .dw .t1, .t2, .t3, .t4, .t5, .t6, .t7, .t8, .t9, .t10, .t11, .t12 - .dw .t13, .t14, .t15, 0 + .dw .t13, .t14, .t15, .t16, .t17, 0 ; Ensure that stack is balanced on failure testSPOnFail: