(module aterm mzscheme
(require (lib "lex.ss" "parser-tools")
(lib "yacc.ss" "parser-tools")
(prefix : (lib "lex-sre.ss" "parser-tools"))
(lib "string.ss" "srfi" "13")
(lib "class.ss"))
(define (test str)
(let ([reader (make-aterm-reader (open-input-string str))])
(let loop ([result null])
(cond
[(reader) => (lambda (next)
(loop (cons next result)))]
[else (reverse result)]))))
(define-tokens regular (ID NUMBER STRING))
(define-empty-tokens operators (LBRACE RBRACE LPAREN RPAREN LBRACK RBRACK COMMA EOF))
(define-lex-abbrevs
(lower-letter (:/ "a" "z"))
(upper-letter (:/ #\A #\Z))
(id-start (:or lower-letter upper-letter "_"))
(digit (:/ "0" "9"))
(id-cont (:or id-start digit)))
(define (regexp-replace** ls str)
(if (null? ls)
str
(regexp-replace** (cdr ls) (regexp-replace* (caar ls) str (cdar ls)))))
(define (unescape str)
(regexp-replace** '((#rx"\\\\a" . "\a")
(#rx"\\\\b" . "\b")
(#rx"\\\\t" . "\t")
(#rx"\\\\n" . "\n")
(#rx"\\\\v" . "\v")
(#rx"\\\\f" . "\f")
(#rx"\\\\r" . "\r")
(#rx"\\\\\\\\" . "\\")
(#rx"\\\\" . ""))
str))
(define aterm-lexer
(lexer ((eof) (token-EOF))
(whitespace (aterm-lexer input-port))
("{" (token-LBRACE))
("}" (token-RBRACE))
("(" (token-LPAREN))
(")" (token-RPAREN))
("[" (token-LBRACK))
("]" (token-RBRACK))
("," (token-COMMA))
((:+ digit) (token-NUMBER (string->number lexeme)))
((:: id-start (:* id-cont)) (token-ID lexeme))
((:: #\" (:* (:or (:~ #\") "\\\"")) #\")
(token-STRING (substring lexeme 1 (- (string-length lexeme) 1))))))
(define (make-lexer in)
(lambda ()
(aterm-lexer in)))
(define-struct annotated (term annotation) #f)
(print-struct #t)
(define token-error-strings
'((ID . "identifier")
(STRING . "string")
(NUMBER . "number")
(LPAREN . "'('")
(RPAREN . "')'")
(LBRACK . "'['")
(RBRACK . "']'")
(LBRACE . "'{'")
(RBRACE . "'}'")
(COMMA . "','")
(EOF . "<eof>")))
(define (token-error-string token)
(cdr (assq token token-error-strings)))
(define-syntax begin1
(syntax-rules ()
[(_ e0 e1 e2 ...)
(begin e0 (begin0 e1 e2 ...))]))
(define scanner%
(class object%
(init-field in)
(define lexer (make-lexer in))
(define look-ahead #f)
(define/public (next-token)
(if look-ahead
(begin0 look-ahead (set! look-ahead #f))
(lexer)))
(define/public (peek)
(set! look-ahead (next-token))
look-ahead)
(define (parse-error expected)
(let ([actual (peek)])
(error 'parse-aterm
(format "expected ~a; found ~v"
(string-join expected ", ")
(or (and (token? actual) (token-value actual)) actual)))))
(define/public (matches? . expected)
(let ([actual (peek)])
(or (and (token? actual)
(memq (token-name actual) expected))
(memq actual expected))))
(define/public (try? . expected)
(and (send/apply this matches? expected)
(next-token)))
(define/public (expect . expected)
(unless (send/apply this matches? expected)
(parse-error (map token-error-string expected)))
(next-token))
(super-make-object)))
(define reader%
(class object%
(init in)
(define scanner (make-object scanner% in))
(define (parse-aterm-list terminator)
(if (send scanner matches? 'ID 'NUMBER 'STRING 'LPAREN 'LBRACK)
(let ([first (parse-aterm)]
[rest (cond
[(send scanner try? 'COMMA) (parse-aterm-list terminator)]
[(send scanner matches? terminator) null]
[else (send scanner expect 'COMMA terminator)])])
(cons first rest))
null))
(define (parse-tuple)
(begin1 (send scanner expect 'LPAREN)
(parse-aterm-list 'RPAREN)
(send scanner expect 'RPAREN)))
(define (parse-list)
(begin1 (send scanner expect 'LBRACK)
(cons 'list (parse-aterm-list 'RBRACK))
(send scanner expect 'RBRACK)))
(define (parse-annotation)
(begin1 (send scanner expect 'LBRACE)
(parse-aterm)
(send scanner expect 'RBRACE)))
(define (parse-aterm)
(let* ([next (send scanner peek)]
[term (cond
[(and (token? next) (eq? (token-name next) 'ID))
(send scanner next-token)
(if (eq? (send scanner peek) 'LPAREN)
(cons (string->symbol (token-value next)) (parse-tuple))
(string->symbol (token-value next)))]
[(token? next) (token-value (send scanner next-token))]
[(eq? next 'LPAREN) (parse-tuple)]
[(eq? next 'LBRACK) (parse-list)]
[else (send scanner expect 'ID 'STRING 'NUMBER 'LPAREN 'LBRACK)])])
(if (send scanner matches? 'LBRACE)
(make-annotated term (parse-annotation))
term)))
(define/public (read)
(and (not (eq? (send scanner peek) 'EOF))
(parse-aterm)))
(super-make-object)))
(define (make-aterm-reader in)
(let ([reader (make-object reader% in)])
(lambda ()
(send reader read))))
(provide make-aterm-reader unescape))