kekkonen/parse.scm

79 lines
3.0 KiB
Scheme
Raw Normal View History

2021-12-01 09:31:48 -05:00
;;; Parser module building on comparse.
2021-12-01 09:49:23 -05:00
;;; The function PARSE-LINE parses one line of text into symbols, quote-enclosed strings and numbers.
2021-12-01 09:31:48 -05:00
(module parse (lift followed-by-consuming is-not parse-whitespace skip-whitespace parse-symbol parse-number parse-string followed-by-consuming separated-by parse-symbol-or-number-or-string completely-parse parse-statement parse-line)
2021-11-30 10:22:15 -05:00
(import scheme)
(import chicken.base)
(import srfi-13)
(import srfi-14)
(import util)
(import comparse)
2021-12-01 09:49:23 -05:00
;; The set of all upper and lower case english letters.
2021-11-30 10:22:15 -05:00
(define +letter-char-set+
(string->char-set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVwXYZ"))
2021-12-01 09:49:23 -05:00
;; The set of all charactes allowed for symbols.
2021-11-30 10:22:15 -05:00
(define +symbol-char-set+
(char-set-union +letter-char-set+ (string->char-set "-0123456789")))
2021-12-01 09:49:23 -05:00
;; This has been accepted into upstream comparse by Moritz Heidkamp but might not
;; exist locally. It lifts a function FN into the parser PARSER, creating a parser
;; that has FN applied to its result.
2021-11-30 10:22:15 -05:00
(define (lift fn parser)
(bind parser (compose result fn)))
2021-12-01 09:49:23 -05:00
;; The logical inverse of IS, returning the result if it is not EQV? to X.
2021-11-30 10:22:15 -05:00
(define (is-not x)
(satisfies (lambda (y)
(not (eqv? x y)))))
2021-12-01 09:49:23 -05:00
;; Parse a string of spaces.
2021-11-30 10:22:15 -05:00
(define parse-whitespace
(one-or-more (is #\space)))
2021-12-01 09:49:23 -05:00
;; Skip any spaces, the input is unchanged if there are none.
2021-11-30 10:22:15 -05:00
(define skip-whitespace
(skip (zero-or-more (is #\space))))
2021-12-01 09:49:23 -05:00
;; Parse a symbol.
2021-11-30 10:22:15 -05:00
(define parse-symbol
(lift (compose string->symbol string-downcase list->string (applied append))
(sequence (lift list (in +letter-char-set+)) (zero-or-more (in +symbol-char-set+)))))
2021-12-01 09:49:23 -05:00
;; Parse a number.
2021-11-30 10:22:15 -05:00
(define parse-number
(lift (compose string->number list->string) (one-or-more (in char-set:digit))))
2021-12-01 09:49:23 -05:00
;; Parse a quote-delimited string.
2021-11-30 10:22:15 -05:00
(define parse-string
(lift list->string (enclosed-by (is #\") (one-or-more (is-not #\")) (is #\"))))
2021-12-01 09:49:23 -05:00
;; This is identical to FOLLOWED-BY from comparse, except it consumes
;; the input from the SEPARATOR parser.
2021-11-30 10:22:15 -05:00
(define (followed-by-consuming parser separator)
(sequence* ((value parser) (_ separator))
(result value)))
2021-12-01 09:49:23 -05:00
;; Parse a list of PARSER separated by SEPARATOR.
2021-11-30 10:22:15 -05:00
(define (separated-by separator parser)
(one-or-more (any-of (followed-by-consuming parser separator) parser)))
2021-12-01 09:49:23 -05:00
;; Parse a symbol or a number or a quote-delimited string.
2021-11-30 10:22:15 -05:00
(define parse-symbol-or-number-or-string
(any-of parse-number parse-symbol parse-string))
2021-12-01 09:49:23 -05:00
;; Only successfully parse if PARSER fully parses the input.
2021-11-30 10:22:15 -05:00
(define (completely-parse parser)
(followed-by parser end-of-input))
2021-12-01 09:49:23 -05:00
;; Parse a statement, optionally preceded by whitespace, consisting of
;; symbols or numbers or quote-delimited strings.
2021-11-30 10:22:15 -05:00
(define parse-statement
(all-of skip-whitespace (separated-by parse-whitespace parse-symbol-or-number-or-string)))
2021-12-01 09:49:23 -05:00
;; Parse a line of text into a list of symbols, numbers
;; and quote-delimited strings.
2021-11-30 10:22:15 -05:00
(define (parse-line line)
(parse (completely-parse parse-statement) line)))