#lang scheme
(require scheme/foreign)
(unsafe!)
(define xml2lib (ffi-lib "libxml2"))
(provide xml->sxml/file
xml->sxml/bytes
bytes->validation-context
validation-context?)
(define-cpointer-type _xmlDocPtr)
(define _xmlElementType
(_enum
'(XML_ELEMENT_NODE = 1
XML_ATTRIBUTE_NODE
XML_TEXT_NODE
XML_CDATA_SECTION_NODE
XML_ENTITY_REF_NODE
XML_ENTITY_NODE
XML_PI_NODE
XML_COMMENT_NODE
XML_DOCUMENT_NODE
XML_DOCUMENT_TYPE_NODE
XML_DOCUMENT_FRAG_NODE
XML_NOTATION_NODE
XML_HTML_DOCUMENT_NODE
XML_DTD_NODE
XML_ELEMENT_DECL
XML_ATTRIBUTE_DECL
XML_ENTITY_DECL
XML_NAMESPACE_DECL
XML_XINCLUDE_START
XML_XINCLUDE_END
XML_DOCB_DOCUMENT_NODE)))
(define-cstruct _xmlNs
([next _xmlNs-pointer]
[type _xmlElementType]
[href _bytes]
[prefix _bytes]
[_private _pointer]
[context _xmlDocPtr]))
(define-cstruct _xmlAttr
([_private _pointer]
[type _xmlElementType]
[name _bytes]
[children _xmlAttr-pointer/null]
[last _pointer]
[parent _pointer] [next _xmlAttr-pointer/null]
[prev _xmlAttr-pointer/null]
[doc _xmlDocPtr]
[ns _xmlNs-pointer/null]
[atype _pointer _xmlAttributeType]
[psvi _pointer]))
(define-cstruct _xmlNode
([_private _pointer]
[type _xmlElementType]
[name _bytes]
[children _xmlNode-pointer/null]
[last _xmlNode-pointer/null]
[parent _xmlNode-pointer/null]
[next _xmlNode-pointer/null]
[prev _xmlNode-pointer/null]
[doc _xmlDocPtr]
[ns _xmlNs-pointer/null]
[content _bytes]
[properties _xmlAttr-pointer/null]
[nsDef _xmlNs-pointer/null]
[psvi _pointer]
[line _ushort]
[extra _ushort]))
(define xmlParseFile (get-ffi-obj "xmlParseFile" xml2lib (_fun (path : _path)
-> (ptr-or-null : _xmlDocPtr/null)
-> (or ptr-or-null (error 'xmlParseFile "failed to parse file: ~e" path)))))
(define xmlParseDoc
(get-ffi-obj "xmlParseDoc" xml2lib (_fun (bytes : _bytes) -> (ptr-or-null : _xmlDocPtr/null)
-> (or ptr-or-null (error 'xmlParseDoc "failed to parse bytes: ~e" bytes)))))
(define xmlDocGetRootElement
(get-ffi-obj "xmlDocGetRootElement" xml2lib (_fun _xmlDocPtr
-> (ptr-or-null : _xmlNode-pointer/null)
-> (or ptr-or-null (error 'xmlDocGetRootElement "failed to get pointer to root node")))))
(define xmlFreeDoc
(get-ffi-obj "xmlFreeDoc" xml2lib (_fun _xmlDocPtr -> _void)))
(define (xmlNode->sxml nodeptr)
(match (xmlNode-type nodeptr)
['XML_ELEMENT_NODE
`(,(xml-format-name (xmlNode-name nodeptr) (xmlNode-ns nodeptr))
,@(if (xmlNode-properties nodeptr)
(list (cons '@ (xmlAttrs->sxml (xmlNode-properties nodeptr))))
null)
,@(map xmlNode->sxml (xmlNode-sibs (xmlNode-children nodeptr))))]
['XML_TEXT_NODE
(bytes-decode (xmlNode-content nodeptr))]
[other
(error 'xmlNode->sxml "expected XML_ELEMENT_NODE as type, got: " other)]))
(define (xmlNode-sibs nodeptr)
(cond [(false? nodeptr) '()]
[else
(unless (memq (xmlNode-type nodeptr) '(XML_ELEMENT_NODE XML_TEXT_NODE))
(error 'xmlNodePtr-sibs "expected XML_ELEMENT_NODE as type, got: " (xmlNode-type nodeptr)))
(cons nodeptr (xmlNode-sibs (xmlNode-next nodeptr)))]))
(define (xmlAttrs->sxml attrs)
(cond [(false? attrs) empty]
[else
(unless (eq? (xmlAttr-type attrs) 'XML_ATTRIBUTE_NODE)
(error 'xmlAttrs->sxml "expected XML_ATTRIBUTE_NODE as type, got: ~v with name ~v" (xmlAttr-type attrs) (xmlNode-name attrs)))
(cons (list (xml-format-name (xmlAttr-name attrs) (xmlAttr-ns attrs))
(bytes-decode (xmlNode-content (cast (xmlAttr-children attrs) _xmlAttr-pointer _xmlNode-pointer))))
(xmlAttrs->sxml (xmlAttr-next attrs)))]))
(define (xml-format-name name nsptr)
(let ([prefix (cond [(false? nsptr) ""]
[else (string-append (bytes-decode (xmlNs-href nsptr)) ":")])])
(string->symbol (string-append prefix (bytes-decode name)))))
(define bytes-decode bytes->string/utf-8)
(define (xml->sxml/file path #:valid [validation-ctxt #f])
(unless (file-exists? path)
(error 'xml->sxml/file "file does not exist: ~a" path))
(let* ([path-string (cond [(path? path) (path->string path)]
[(string? path) path]
[else (raise-mismatch-error 'xml->sxml/file "expected path or string as argument, got " path)])]
[docptr (xmlParseFile path-string)])
(when validation-ctxt
(doc-validate validation-ctxt docptr))
(begin0 (list '*TOP* (xmlNode->sxml (xmlDocGetRootElement docptr)))
(register-finalizer docptr xmlFreeDoc))))
(define (xml->sxml/bytes b #:valid [validation-ctxt #f])
(let* ([docptr (xmlParseDoc b)])
(when validation-ctxt
(doc-validate validation-ctxt docptr))
(begin0 (list '*TOP* (xmlNode->sxml (xmlDocGetRootElement docptr)))
(register-finalizer docptr xmlFreeDoc))))
(define (doc-validate validation-ctxt docptr)
(let ([validation-result (xmlRelaxNGValidateDoc validation-ctxt docptr)])
(unless (= validation-result 0)
(error 'doc-validate "error in validating document: ~e" validation-result))))
(define-cpointer-type _xmlRelaxNGParserCtxtPtr)
(define-cpointer-type _xmlRelaxNGPtr)
(define-cpointer-type _xmlRelaxNGValidCtxtPtr)
(define xmlRelaxNGFree
(get-ffi-obj "xmlRelaxNGFree" xml2lib (_fun _xmlRelaxNGPtr -> _void)))
(define xmlRelaxNGFreeParserCtxt
(get-ffi-obj "xmlRelaxNGFreeParserCtxt" xml2lib (_fun _xmlRelaxNGParserCtxtPtr -> _void)))
(define xmlRelaxNGFreeValidCtxt
(get-ffi-obj "xmlRelaxNGFreeValidCtxt" xml2lib (_fun _xmlRelaxNGValidCtxtPtr -> _void)))
(define xmlRelaxNGNewMemParserCtxt/ffi
(get-ffi-obj "xmlRelaxNGNewMemParserCtxt" xml2lib (_fun _bytes (size : _int) -> _xmlRelaxNGParserCtxtPtr/null)))
(define xmlRelaxNGParse
(get-ffi-obj "xmlRelaxNGParse" xml2lib (_fun _xmlRelaxNGParserCtxtPtr -> _xmlRelaxNGPtr/null)))
(define xmlRelaxNGNewValidCtxt
(get-ffi-obj "xmlRelaxNGNewValidCtxt" xml2lib (_fun _xmlRelaxNGPtr -> _xmlRelaxNGValidCtxtPtr/null)))
(define xmlRelaxNGValidateDoc
(get-ffi-obj "xmlRelaxNGValidateDoc" xml2lib
(_fun _xmlRelaxNGValidCtxtPtr _xmlDocPtr -> _int)))
(define (bytes->validation-context b)
(let* ([parser-ctxt (xmlRelaxNGNewMemParserCtxt/ffi b (bytes-length b))]
[spec (xmlRelaxNGParse parser-ctxt)]
[validation-ctxt (xmlRelaxNGNewValidCtxt spec)])
(register-finalizer validation-ctxt
(lambda (vc)
(xmlRelaxNGFree spec)
(xmlRelaxNGFreeParserCtxt parser-ctxt)
(xmlRelaxNGFreeValidCtxt vc)))
validation-ctxt))
(define (validation-context? vc)
(xmlRelaxNGValidCtxtPtr? vc))