#lang racket/base (require "ssax/sxpathlib.rkt" "ssax/util.rkt" "ssax/myenv.rkt") (provide (all-defined-out)) ; This software is in Public Domain. ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. ; ; SXML normal form used for normalization-dependent functions: ; If attr-list is present it's always the second in SXML element. ; If aux-list is present - then list of attributes is always ; included, and aux-list is always the third. ; Minimized form is just the same, but all the empty aux-lists are ; absent, and empty attr-lists are present only in elements with aux-lists ; present. ;============================================================================== ; Auxiliary functions. ; unlike filter-map from SRFI-1 this function uses separate predicate ; and mapping functions. ; Applies proc to all the elements of source list that satisfy the predicate ; and return the list of the results. (define (filter-and-map pred proc lis) (let rpt ((l lis)) (if (null? l) '() (if (pred (car l)) (cons (proc (car l)) (rpt (cdr l))) (rpt (cdr l)))))) ; Applies pred to every member of lst and yields #t if all the results ; are #t (define (check-list pred lst) (andmap pred lst)) ; Returns attr-list node for a given obj ; or #f if it is absent (define (sxml:attr-list-node obj) (if (and (not (null? (cdr obj))) (pair? (cadr obj)) (eq? '@ (caadr obj))) (cadr obj) #f)) ; Returns attr-list wrapped in list ; or '((@)) if it is absent and aux-list is present ; or '() if both lists are absent (define (sxml:attr-as-list obj) (cond ((sxml:attr-list-node obj) => list) ((sxml:aux-list-node obj) '((@))) (else '()))) ; Returns aux-list node for a given obj ; or #f if it is absent (define (sxml:aux-list-node obj) (if (or (null? (cdr obj)) (null? (cddr obj)) (not (pair? (caddr obj))) (not (eq? (caaddr obj) '@@))) #f (caddr obj))) ; Returns aux-list wrapped in list ; or '() if it is absent (define (sxml:aux-as-list obj) (cond ((sxml:aux-list-node obj) => list) (else '()))) ; optimized (string-rindex name #\:) ; returns position of a separator between namespace-id and LocalName (define-syntax-rule (sxml:find-name-separator name len) (let rpt ((pos (-- len))) (cond ((negative? pos) #f) ((char=? #\: (string-ref name pos)) pos) (else (rpt (-- pos)))))) ;============================================================================== ; Predicates ; Predicate which returns #t if given element <obj> is empty. ; Empty element has no nested elements, text nodes, PIs, Comments or entities ; but it may contain attributes or namespace-id. ; It is a SXML counterpart of XML empty-element. (define (sxml:empty-element? obj) (not ((select-first-kid (lambda(x) (or ((ntype-names?? '(*PI* *COMMENT* *ENTITY*)) x) ((ntype?? '*) x) (string? x)))) obj))) ; Returns #t if the given <obj> is shallow-normalized SXML element. ; The element itself has to be normalised but its nested elements are not tested. (define (sxml:shallow-normalized? obj) (or (null? (cdr obj)) (and (or (and (pair? (cadr obj)) (eq? (caadr obj) '@)) (not ((select-first-kid (ntype-names?? '(@ @@))) obj))) (or (null? (cddr obj)) (and (pair? (caddr obj)) (eq? (caaddr obj) '@@)) (not ((select-first-kid (ntype?? '@@)) obj)))))) ; Returns #t if the given <obj> is normalized SXML element. ; The element itself and all its nested elements have to be normalised. (define (sxml:normalized? obj) (and (sxml:shallow-normalized? obj) (check-list (lambda(x) (if (sxml:element? x) (sxml:normalized? x) #t)) (sxml:content obj)))) ; Returns #t if the given <obj> is shallow-minimized SXML element. ; The element itself has to be minimised but its nested elements are not tested. (define (sxml:shallow-minimized? obj) (and (sxml:shallow-normalized? obj) (not (and (sxml:aux-list-node obj) (null? (sxml:aux-list obj)))) (not (and (sxml:attr-list-node obj) (null? (sxml:attr-list obj)) (not (sxml:aux-list-node obj)))))) ; Returns #t if the given <obj> is minimized SXML element. ; The element itself and all its nested elements have to be minimised. (define (sxml:minimized? obj) (and (sxml:shallow-minimized? obj) (check-list (lambda(x) (if (sxml:element? x) (sxml:minimized? x) #t)) (sxml:content obj)) )) ;============================================================================== ; Accessors ; Returns a name of a given SXML node ; It is introduced for the sake of encapsulation. (define sxml:name car) ; A version of sxml:name, which returns #f if the given <obj> is ; not a SXML element. ; Otherwise returns its name. (define (sxml:element-name obj) (and ((ntype?? '*) obj) (car obj))) ; Safe version of sxml:name, which returns #f if the given <obj> is ; not a SXML node. ; Otherwise returns its name. (define (sxml:node-name obj) (and (pair? obj) (symbol? (car obj)) (car obj))) ; Returns Local Part of Qualified Name (Namespaces in XML production [6]) ; for given obj, which is ":"-separated suffix of its Qualified Name ; If a name of a node given is NCName (Namespaces in XML production [4]), then ; it is returned as is. ; Please note that while SXML name is a symbol this function returns a string. (define (sxml:ncname obj) (let* ((name (symbol->string (car obj))) (len (string-length name))) (cond ((sxml:find-name-separator name len) => (lambda (pos) (substring name (+ pos 1) len))) (else name)))) ; Returns namespace-id part of given name, or #f if it's LocalName (define (sxml:name->ns-id sxml-name) (let* ((name (symbol->string sxml-name))) (cond ((sxml:find-name-separator name (string-length name)) => (lambda (pos) (substring name 0 pos))) (else #f)))) ; Returns the content of given SXML element or nodeset (just text and element ; nodes) representing it as a list of strings and nested elements in document ; order. This list is empty if <obj> is empty element or empty list. (define (sxml:content obj) (((if (nodeset? obj) sxml:filter select-kids) (lambda(x) (or (string? x) ; ((ntype?? '*text*) x) ((ntype?? '*) x)))) obj)) ; Returns a string which combines all the character data ; from text node childrens of the given SXML element ; or "" if there is no text node children (define (sxml:text obj) (let ((tnodes ((select-kids string?) obj))) (cond ((null? tnodes) "") ((null? (cdr tnodes)) (car tnodes)) (else (apply string-append tnodes))))) ;------------------------------------------------------------------------------ ; Normalization-dependent accessors ; ; ; "Universal" accessors are less effective but may be used for non-normalized SXML ; Safe accessors are named with suffix '-u' ; ; "Fast" accessors are optimized for normalized SXML data. ; They are not applicable to arbitrary non-normalized SXML data ; Their names has no specific suffixes ; Returns all the content of normalized SXML element except attr-list and ; aux-list. ; Thus it includes PI, COMMENT and ENTITY nodes as well as TEXT and ELEMENT nodes ; returned by sxml:content. ; Returns a list of nodes in document order or empty list if <obj> is empty ; element or empty list. ; This function is faster than sxml:content (define (sxml:content-raw obj) ((if (and (not (null? (cdr obj))) (pair? (cadr obj)) (eq? (caadr obj) '@)) (if (and (not (null? (cddr obj))) (pair? (caddr obj)) (eq? (caaddr obj) '@@)) cdddr cddr) cdr) obj)) ; Returns the list of attributes for given element or nodeset. ; Analog of ((sxpath '(@ *)) obj) ; Empty list is returned if there is no list of attributes. (define (sxml:attr-list-u obj) (cond (((select-first-kid (ntype?? '@)) obj) => cdr) (else '()))) ; Returns the list of auxiliary nodes for given element or nodeset. ; Analog of ((sxpath '(@@ *)) obj) ; Empty list is returned if a list of auxiliary nodes is absent. (define (sxml:aux-list obj) (if (or (null? (cdr obj)) (null? (cddr obj)) (not (pair? (caddr obj))) (not (eq? (caaddr obj) '@@))) '() (cdaddr obj))) ; Returns the list of auxiliary nodes for given element or nodeset. ; Analog of ((sxpath '(@@ *)) obj) ; Empty list is returned if a list of auxiliary nodes is absent. (define (sxml:aux-list-u obj) (cond (((select-first-kid (ntype?? '@@)) obj) => cdr) (else '()))) ; Return the first aux-node with <aux-name> given in SXML element <obj> ; or #f is such a node is absent. ; NOTE: it returns just the FIRST node found even if multiple nodes are ; present, so it's mostly intended for nodes with unique names (define (sxml:aux-node obj aux-name) (cond ((assq aux-name (sxml:aux-list obj))) (else #f))) ; Return a list of aux-node with <aux-name> given in SXML element <obj> ; or '() if such a node is absent. (define (sxml:aux-nodes obj aux-name) (filter (lambda(x) (eq? aux-name (car x))) (sxml:aux-list obj))) ; Accessor for an attribute <attr-name> of given SXML element <obj> which ; It returns: ; the value of the attribute if the attribute is present ; #f if there is no such an attribute in the given element (define (sxml:attr obj attr-name) (cond ((assq attr-name (sxml:attr-list obj)) => cadr) (else #f))) ; Extracts a value of attribute with given name from attr-list (define (sxml:attr-from-list attr-list name) (cond ((assq name attr-list) => cadr) (else #f))) ; Accessor for a numerical attribute <attr-name> of given SXML element <obj> ; which It returns: ; a value of the attribute as the attribute as a number if the attribute ; is present and its value may be converted to number using string->number ; #f if there is no such an attribute in the given element or ; its value can't be converted to a number (define (sxml:num-attr obj attr-name) (cond ((assq attr-name (sxml:attr-list obj)) => (lambda(x) (string->number (cadr x)))) (else #f))) ; Accessor for an attribute <attr-name> of given SXML element <obj> which ; may also be an attributes-list or nodeset (usually content of SXML element) ; ; It returns: ; the value of the attribute if the attribute is present ; #f if there is no such an attribute in the given element (define (sxml:attr-u obj attr-name) (cond ((assq attr-name ; the list of attributes is computed below (cond ((and (not (null? (cdr obj))) (pair? (cadr obj)) (eq? '@ (caadr obj))) (cdadr obj)) ; fast track for normalized elements ((eq? '@ (car obj)) (cdr obj)) ; if applied to attr-list (else (sxml:attr-list-u obj)))) => cadr) (else #f))) ; Returns the list of namespaces for given element. ; Analog of ((sxpath '(@@ *NAMESPACES* *)) obj) ; Empty list is returned if there is no list of namespaces. (define (sxml:ns-list obj) (cond ((assv '*NAMESPACES* (sxml:aux-list obj)) => cdr) (else '()))) ; Returns the list of namespace-assoc's for given namespace-id in ; SXML element <obj>. ; Analog of ((sxpath '(@@ *NAMESPACES* namespace-id)) obj) ; Empty list is returned if there is no namespace-assoc with namespace-id ; given. (define (sxml:ns-id->nodes obj namespace-id) (filter (lambda(x) (eq? (car x) namespace-id)) (sxml:ns-list obj))) ; It returns: ; A URI's for namespace-id given ; #f if there is no namespace-assoc with namespace-id given (define (sxml:ns-id->uri obj namespace-id) (cond ((assq namespace-id (sxml:ns-list obj)) => cadr) (else #f))) ; Returns a list of namespace-assocs nodes for NS URI given (define (sxml:ns-uri->nodes obj URI) (filter (lambda (ns-assoc) (string=? (cadr ns-assoc) URI)) (sxml:ns-list obj))) ; Returns a namespace-id for NS URI given (define (sxml:ns-uri->id obj URI) (let rpt ((ns-assocs (sxml:ns-list obj))) (cond ((null? ns-assocs) #f) ((string=? (cadar ns-assocs) URI) (caar ns-assocs)) (else (rpt (cdr ns-assocs))) ))) ; Returns namespace-id for given namespace-assoc list (define sxml:ns-id car) ; Returns URI for given namespace-assoc list (define sxml:ns-uri cadr) ; It returns namespace prefix for given namespace-assoc list ; Original (as in XML document) prefix for namespace-id given ; has to be strored as the third element in namespace-assoc list ; if it is different from namespace-id. ; If original prefix is omitted in namespace-assoc then ; namespace-id is used instead (define (sxml:ns-prefix ns-assoc) (if (> (length ns-assoc) 2) (caddr ns-assoc) (car ns-assoc))) ;============================================================================== ; Data modification functions ; Constructors and mutators for normalized SXML data ; ; This functions are optimized for normalized SXML data. ; They are not applicable to arbitrary non-normalized SXML data ; ; Most of the functions are provided in two variants: ; 1. side-effect intended functions for linear update of given elements. ; Their names are ended with exclamation mark. ; An example: ; sxml:change-content! ; 2. pure functions without side-effects which return modified elements. ; An example: ; sxml:change-content ; Change the content of given SXML element to <new-content> ; If <new-content> is an empty list then the <obj> is transformed ; to an empty element ; The resulting SXML element is normalized (define (sxml:change-content obj new-content) `(,(sxml:name obj) ,@(sxml:attr-as-list obj) ,@(sxml:aux-as-list obj) ,@new-content)) ; The resulting SXML element is normalized, if <new-attrlist> is empty, ; the cadr of <obj> is (@) (define (sxml:change-attrlist obj new-attrlist) `(,(sxml:name obj) ,@(cond (new-attrlist `((@ ,@new-attrlist))) ((sxml:aux-list-node obj) '((@))) (else `())) ,@(sxml:aux-as-list obj) ,@(sxml:content obj))) ; Returns SXML element with its name changed (define (sxml:change-name obj new-name) (cons new-name (cdr obj))) ; Returns SXML element <obj> with attribute <attr> added or #f ; if the attribute with given name already exists, ; <attr> is (<attr-name> <attr-value>) ; Pure functional counterpart to sxml:add-attr! (define (sxml:add-attr obj attr) (let ((attr-list (sxml:attr-list obj))) (if (assq (car attr) attr-list) #f `(,(sxml:name obj) (@ ,@(cons attr attr-list)) ,@(sxml:aux-as-list obj) ,@(sxml:content obj))))) ; Returns SXML element <obj> with changed value of attribute <attr> or #f ; if where is no attribute with given name. ; <attr> is (<attr-name> <attr-value>) (define (sxml:change-attr obj attr) (let ((attr-list (sxml:attr-list obj))) (if (null? attr-list) #f (cond ((assv (car attr) attr-list) => (lambda (y) `(,(sxml:name obj) (@ ,@(map (lambda(at) (if (eq? at y) attr at)) attr-list)) ,@(sxml:aux-as-list obj) ,@(sxml:content obj) ))) (else #f))))) ; Set attribute <attr> of element <obj> ; If there is no such attribute the new one is added (define (sxml:set-attr obj attr) (let ((attr-list (sxml:attr-list obj))) (cond ((assv (car attr) attr-list) => (lambda (y) `(,(sxml:name obj) (@ ,@(map (lambda(at) (if (eq? at y) attr at)) attr-list)) ,@(sxml:aux-as-list obj) ,@(sxml:content obj) ))) (else `(,(sxml:name obj) (@ ,@(cons attr attr-list)) ,@(sxml:aux-as-list obj) ,@(sxml:content obj)))) )) ; Returns SXML element <obj> with an auxiliary node <aux-node> added (define (sxml:add-aux obj aux-node) `(,(sxml:name obj) (@ ,@(sxml:attr-list obj)) (@@ ,@(cons aux-node (sxml:aux-list obj))) ,@(sxml:content obj))) ; Eliminates empty lists of attributes and aux-lists for given SXML element ; <obj> and its descendants ("minimize" it) ; Returns: minimized and normalized SXML element (define (sxml:squeeze obj) `(,(sxml:name obj) ,@(cond ((sxml:attr-list-node obj) => (lambda (atl) (if (and (null? (cdr atl)) (null? (sxml:aux-list obj))) '() (list atl)))) (else '())) ,@(cond ((sxml:aux-list-node obj) => (lambda (axl) (if (null? (cdr axl)) '() (list axl)))) (else '())) ,@(map (lambda(x) (cond (((ntype?? '*) x) (sxml:squeeze x)) (else x))) (sxml:content obj)))) ; Eliminates empty lists of attributes and ALL aux-lists for given SXML element ; <obj> and its descendants ; Returns: minimized and normalized SXML element (define (sxml:clean obj) `(,(sxml:name obj) ,@(cond ((sxml:attr-list-node obj) => (lambda (atl) (if (null? (cdr atl)) '() (list atl)))) (else '())) ,@(map (lambda(x) (cond (((ntype?? '*) x) (sxml:clean x)) (else x))) (sxml:content obj)))) ;============================================================================== ; SXPath-related ;------------------------------------------------------------------------------ ; Extensions ; select-first-kid:: Pred -> Node -> Node ; Given a Node, return its first child that satisfy ; the test-pred? ; Returns #f if there is no such a child ; select-first-kid:: Pred -> Nodeset -> Node ; The same as above, but select among children of all the nodes in ; the Nodeset (define (select-first-kid test-pred?) (lambda(obj) (let rpt ((lst (if (symbol? (car obj)) (cdr obj) obj))) (cond ((null? lst) #f) ((and (pair? (car lst)) (test-pred? (car lst))) (car lst)) (else (rpt (cdr lst)))) ))) ;------------------------------------------------------------------------------ ; Fast node-parent ; Returns a function of one argument - SXML element - which returns its parent ; node using *PARENT* pointer in aux-list ; '*TOP-PTR* may be used as a pointer to root node ; It return an empty list when applyed to root node (define (sxml:node-parent rootnode) (lambda(obj) (cond ((sxml:aux-node obj '*PARENT*) => (lambda(x) (if (eq? '*TOP-PTR* (cadr x)) rootnode ((cadr x))))) ((and (pair? obj) (eq? (car obj) '*TOP* )) '()) (else (error 'sxml:node-parent "PARENT pointer is absent in: ~e" obj))))) ; Lookup an element using its ID (define (sxml:lookup id index) (cond ((assoc id index) => cdr) (else #f))) ;============================================================================== ; Markup generation ;------------------------------------------------------------------------------ ; XML ; Creates the XML markup for attributes. (define (sxml:attr->xml attr) (list " " (sxml:ncname attr) "='" (cadr attr) "'")) ; Return a string or a list of strings where all the occurences of ; characters < > & " ' in a given string are replaced by corresponding ; character entity references. See also: sxml:string->html (define sxml:string->xml (make-char-quotator '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """) (#\' . "'")))) ; A version of dispatch-node specialized and optimized for SXML->XML ; transformation. (define (sxml:sxml->xml tree) (cond ((nodeset? tree) (map (lambda (a-tree) (sxml:sxml->xml a-tree)) tree)) ((pair? tree) (let* ((name (sxml:name tree)) ; NS (URI-prefixed) not supported (nm (symbol->string name)) (content (sxml:content-raw tree))) `("<" ,nm ,@(map sxml:attr->xml (sxml:attr-list tree)) ,@(if (null? content) '("/>") `(">" ,@(sxml:sxml->xml content) "</" ,nm ">"))))) ((string? tree) (sxml:string->xml tree)) ; *text* (else (error 'sxml:sxml->xml "unexpected type of node: ~e" tree)))) ;------------------------------------------------------------------------------ ; HTML ; Creates the HTML markup for attributes. (define (sxml:attr->html attr) (if (equal? "" (cadr attr)) (list " " (sxml:ncname attr)) (list " " (sxml:ncname attr) "='" (cadr attr) "'"))) ; Given a string, check to make sure it does not contain characters ; < > & " that require encoding. Return either the original ; string, or a list of string fragments with special characters ; replaced by appropriate character entities. ; Borrowed from Oleg Kiselyov's XML-to-HTML.scm (where its name is ; string->goodHTML) (define sxml:string->html (make-char-quotator '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))) ; This predicate yields #t for "unterminated" HTML 4.0 tags (define (sxml:non-terminated-html-tag? tag) (memq tag '(area base basefont br col frame hr img input isindex link meta param))) ; A version of dispatch-node specialized and optimized for SXML->HTML ; transformation. (define (sxml:sxml->html tree) (cond ((nodeset? tree) (map (lambda (a-tree) (sxml:sxml->html a-tree)) tree)) ((pair? tree) (let* ((name (sxml:name tree)) (nm (symbol->string name)) (content (sxml:content-raw tree))) `("<" ,nm ,@(map sxml:attr->html (sxml:attr-list tree)) ,@(if (null? content) (if (sxml:non-terminated-html-tag? name) '(">") '("/>")) `(">" ,@(sxml:sxml->html content) "</" ,nm ">"))))) ((string? tree) (sxml:string->html tree)) ; *text* (else (error 'sxml:sxml->html "unexpected type of node: ~e" tree))))