View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker & Richard O'Keefe
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2004-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(sgml_write,
   37          [ html_write/2,               %          +Data, +Options
   38            html_write/3,               % +Stream, +Data, +Options
   39            sgml_write/2,               %          +Data, +Options
   40            sgml_write/3,               % +Stream, +Data, +Options
   41            xml_write/2,                %          +Data, +Options
   42            xml_write/3                 % +Stream, +Data, +Options
   43          ]).   44:- autoload(library(assoc),
   45	    [get_assoc/3,empty_assoc/1,put_assoc/4,list_to_assoc/2]).   46:- autoload(library(error),
   47	    [ must_be/2,
   48	      domain_error/2,
   49	      instantiation_error/1,
   50	      type_error/2
   51	    ]).   52:- autoload(library(gensym),[gensym/2]).   53:- autoload(library(lists),[select/3]).   54:- autoload(library(option),[option/3]).   55:- autoload(library(sgml),[dtd/2,dtd_property/2]).   56
   57:- predicate_options(xml_write/2, 2, [pass_to(xml_write/3, 3)]).   58:- predicate_options(xml_write/3, 3,
   59                     [ dtd(any),
   60                       doctype(atom),
   61                       public(atom),
   62                       system(atom),
   63                       header(boolean),
   64                       nsmap(list),
   65                       indent(nonneg),
   66                       layout(boolean),
   67                       net(boolean),
   68                       cleanns(boolean)
   69                     ]).   70
   71:- multifile
   72    xmlns/2.                        % NS, URI
   73
   74/** <module> XML/SGML writer module
   75
   76This library provides the inverse functionality   of  the sgml.pl parser
   77library, writing XML, SGML and HTML documents from the parsed output. It
   78is intended to allow rewriting in a  different dialect or encoding or to
   79perform document transformation in Prolog on the parsed representation.
   80
   81The current implementation is  particularly   keen  on getting character
   82encoding and the use of character  entities   right.  Some work has been
   83done providing layout, but space handling in   XML  and SGML make this a
   84very hazardous area.
   85
   86The Prolog-based low-level character and  escape   handling  is the real
   87bottleneck in this library and will probably be   moved  to C in a later
   88stage.
   89
   90@see    library(http/html_write) provides a high-level library for
   91        emitting HTML and XHTML.
   92*/
   93
   94%!  xml_write(+Data, +Options) is det.
   95%!  sgml_write(+Data, +Options) is det.
   96%!  html_write(+Data, +Options) is det.
   97%!  xml_write(+Stream, +Data, +Options) is det.
   98%!  sgml_write(+Stream, +Data, +Options) is det.
   99%!  html_write(+Stream, +Data, +Options) is det.
  100%
  101%   Write a term as created by the SGML/XML parser to a stream in
  102%   SGML or XML format.  Options:
  103%
  104%           * cleanns(Bool)
  105%           If `true` (default), remove duplicate `xmlns`
  106%           attributes.
  107%           * dtd(DTD)
  108%           The DTD.  This is needed for SGML documents that contain
  109%           elements with content model EMPTY.  Characters which may
  110%           not be written directly in the Stream's encoding will be
  111%           written using character data entities from the DTD if at
  112%           all possible, otherwise as numeric character references.
  113%           Note that the DTD will NOT be written out at all; as yet
  114%           there is no way to write out an internal subset,  though
  115%           it would not be hard to add one.
  116%
  117%           * doctype(DocType)
  118%           Document type for the SGML document type declaration.
  119%           If omitted it is taken from the root element.  There is
  120%           never any point in having this be disagree with the
  121%           root element.  A <!DOCTYPE> declaration will be written
  122%           if and only if at least one of doctype(_), public(_), or
  123%           system(_) is provided in Options.
  124%
  125%           * public(PubId)
  126%           The public identifier to be written in the <!DOCTYPE> line.
  127%
  128%           * system(SysId)
  129%           The system identifier to be written in the <!DOCTYPE> line.
  130%
  131%           * header(Bool)
  132%           If Bool is 'false', do not emit the <xml ...> header
  133%           line.  (xml_write/3 only)
  134%
  135%           * nsmap(Map:list(Id=URI))
  136%           When emitting embedded XML, assume these namespaces
  137%           are already defined from the environment.  (xml_write/3
  138%           only).
  139%
  140%           * indent(Indent)
  141%           Indentation of the document (for embedding)
  142%
  143%           * layout(Bool)
  144%           Emit/do not emit layout characters to make output
  145%           readable.
  146%
  147%           * net(Bool)
  148%           Use/do not use Null End Tags.
  149%           For XML, this applies only to empty elements, so you get
  150%
  151%           ==
  152%               <foo/>      (default, net(true))
  153%               <foo></foo> (net(false))
  154%           ==
  155%
  156%           For SGML, this applies to empty elements, so you get
  157%
  158%           ==
  159%               <foo>       (if foo is declared to be EMPTY in the DTD)
  160%               <foo></foo> (default, net(false))
  161%               <foo//      (net(true))
  162%           ==
  163%
  164%           and also to elements with character content not containing /
  165%
  166%           ==
  167%               <b>xxx</b>  (default, net(false))
  168%               <b/xxx/     (net(true)).
  169%           ==
  170%
  171%   Note that if the stream is UTF-8, the system will write special
  172%   characters as UTF-8 sequences, while if it is ISO Latin-1 it
  173%   will use (character) entities if there is a DTD that provides
  174%   them, otherwise it will use numeric character references.
  175
  176xml_write(Data, Options) :-
  177    current_output(Stream),
  178    xml_write(Stream, Data, Options).
  179
  180xml_write(Stream0, Data, Options) :-
  181    fix_user_stream(Stream0, Stream),
  182    (   stream_property(Stream, encoding(text))
  183    ->  set_stream(Stream, encoding(utf8)),
  184        call_cleanup(xml_write(Stream, Data, Options),
  185                     set_stream(Stream, encoding(text)))
  186    ;   new_state(xml, State),
  187        init_state(Options, State),
  188        get_state(State, nsmap, NSMap),
  189        add_missing_namespaces(Data, NSMap, Data1),
  190        emit_xml_encoding(Stream, Options),
  191        emit_doctype(Options, Data, Stream),
  192        write_initial_indent(State, Stream),
  193        emit(Data1, Stream, State)
  194    ).
  195
  196
  197sgml_write(Data, Options) :-
  198    current_output(Stream),
  199    sgml_write(Stream, Data, Options).
  200
  201sgml_write(Stream0, Data, Options) :-
  202    fix_user_stream(Stream0, Stream),
  203    (   stream_property(Stream, encoding(text))
  204    ->  set_stream(Stream, encoding(utf8)),
  205        call_cleanup(sgml_write(Stream, Data, Options),
  206                     set_stream(Stream, encoding(text)))
  207    ;   new_state(sgml, State),
  208        init_state(Options, State),
  209        write_initial_indent(State, Stream),
  210        emit_doctype(Options, Data, Stream),
  211        emit(Data, Stream, State)
  212    ).
  213
  214
  215html_write(Data, Options) :-
  216    current_output(Stream),
  217    html_write(Stream, Data, Options).
  218
  219html_write(Stream, Data, Options) :-
  220    sgml_write(Stream, Data,
  221               [ dtd(html)
  222               | Options
  223               ]).
  224
  225fix_user_stream(user, user_output) :- !.
  226fix_user_stream(Stream, Stream).
  227
  228
  229init_state([], _).
  230init_state([H|T], State) :-
  231    update_state(H, State),
  232    init_state(T, State).
  233
  234update_state(dtd(DTD), State) :-
  235    !,
  236    (   atom(DTD)
  237    ->  dtd(DTD, DTDObj)
  238    ;   DTDObj = DTD
  239    ),
  240    set_state(State, dtd, DTDObj),
  241    dtd_character_entities(DTDObj, EntityMap),
  242    set_state(State, entity_map, EntityMap).
  243update_state(nsmap(Map), State) :-
  244    !,
  245    set_state(State, nsmap, Map).
  246update_state(cleanns(Bool), State) :-
  247    !,
  248    must_be(boolean, Bool),
  249    set_state(State, cleanns, Bool).
  250update_state(indent(Indent), State) :-
  251    !,
  252    must_be(integer, Indent),
  253    set_state(State, indent, Indent).
  254update_state(layout(Bool), State) :-
  255    !,
  256    must_be(boolean, Bool),
  257    set_state(State, layout, Bool).
  258update_state(doctype(_), _) :- !.
  259update_state(public(_),  _) :- !.
  260update_state(system(_),  _) :- !.
  261update_state(net(Bool), State) :-
  262    !,
  263    must_be(boolean, Bool),
  264    set_state(State, net, Bool).
  265update_state(header(Bool), _) :-
  266    !,
  267    must_be(boolean, Bool).
  268update_state(Option, _) :-
  269    domain_error(xml_write_option, Option).
  270
  271%       emit_xml_encoding(+Stream, +Options)
  272%
  273%       Emit the XML fileheader with   encoding information. Setting the
  274%       right encoding on the output stream  must be done before calling
  275%       xml_write/3.
  276
  277emit_xml_encoding(Out, Options) :-
  278    option(header(Hdr), Options, true),
  279    Hdr == true,
  280    !,
  281    stream_property(Out, encoding(Encoding)),
  282    (   (   Encoding == utf8
  283        ;   Encoding == wchar_t
  284        )
  285    ->  format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
  286    ;   Encoding == iso_latin_1
  287    ->  format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
  288    ;   domain_error(xml_encoding, Encoding)
  289    ).
  290emit_xml_encoding(_, _).
  291
  292
  293%!  emit_doctype(+Options, +Data, +Stream)
  294%
  295%   Emit the document-type declaration.
  296%   There is a problem with the first clause if we are emitting SGML:
  297%   the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
  298%   attribute; so the only time this is useful is when it is illegal!
  299
  300emit_doctype(_Options, Data, Out) :-
  301    (   Data = [_|_], memberchk(element(html,Att,_), Data)
  302    ;   Data = element(html,Att,_)
  303    ),
  304    memberchk(version=Version, Att),
  305    !,
  306    format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]).
  307emit_doctype(Options, Data, Out) :-
  308    (   memberchk(public(PubId), Options) -> true
  309    ;   PubId = (-)
  310    ),
  311    (   memberchk(system(SysId), Options) -> true
  312    ;   SysId = (-)
  313    ),
  314    \+ (PubId == (-),
  315        SysId == (-),
  316        \+ memberchk(doctype(_), Options)
  317    ),
  318    (   Data = element(DocType,_,_)
  319    ;   Data = [_|_], memberchk(element(DocType,_,_), Data)
  320    ;   memberchk(doctype(DocType), Options)
  321    ),
  322    !,
  323    write_doctype(Out, DocType, PubId, SysId).
  324emit_doctype(_, _, _).
  325
  326write_doctype(Out, DocType, -, -) :-
  327    !,
  328    format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]).
  329write_doctype(Out, DocType, -, SysId) :-
  330    !,
  331    format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]).
  332write_doctype(Out, DocType, PubId, -) :-
  333    !,
  334    format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]).
  335write_doctype(Out, DocType, PubId, SysId) :-
  336    format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
  337
  338
  339%!  emit(+Element, +Out, +State, +Options)
  340%
  341%   Emit a single element
  342
  343emit(Var, _, _) :-
  344    var(Var),
  345    !,
  346    instantiation_error(Var).
  347emit([], _, _) :- !.
  348emit([H|T], Out, State) :-
  349    !,
  350    emit(H, Out, State),
  351    emit(T, Out, State).
  352emit(CDATA, Out, State) :-
  353    atomic(CDATA),
  354    !,
  355    sgml_write_content(Out, CDATA, State).
  356emit(Element, Out, State) :-
  357    \+ \+ emit_element(Element, Out, State).
  358
  359emit_element(pi(PI), Out, State) :-
  360    !,
  361    get_state(State, entity_map, EntityMap),
  362    write(Out, <?),
  363    write_quoted(Out, PI, '', EntityMap),
  364    (   get_state(State, dialect, xml) ->
  365        write(Out, ?>)
  366    ;   write(Out, >)
  367    ).
  368emit_element(element(Name, Attributes, Content), Out, State) :-
  369    !,
  370    must_be(list, Attributes),
  371    must_be(list, Content),
  372    (   get_state(State, dialect, xml)
  373    ->  update_nsmap(Attributes, CleanAttrs, State),
  374        (   get_state(State, cleanns, true)
  375        ->  WriteAttrs = CleanAttrs
  376        ;   WriteAttrs = Attributes
  377        )
  378    ;   WriteAttrs = Attributes
  379    ),
  380    att_length(WriteAttrs, State, Alen),
  381    (   Alen > 60,
  382        get_state(State, layout, true)
  383    ->  Sep = nl,
  384        AttIndent = 4
  385    ;   Sep = sp,
  386        AttIndent = 0
  387    ),
  388    put_char(Out, '<'),
  389    emit_name(Name, Out, State),
  390    (   AttIndent > 0
  391    ->  \+ \+ ( inc_indent(State, AttIndent),
  392                attributes(WriteAttrs, Sep, Out, State)
  393              )
  394    ;   attributes(WriteAttrs, Sep, Out, State)
  395    ),
  396    content(Content, Out, Name, State).
  397emit_element(E, _, _) :-
  398    type_error(xml_dom, E).
  399
  400attributes([], _, _, _).
  401attributes([H|T], Sep, Out, State) :-
  402    (   Sep == nl
  403    ->  write_indent(State, Out)
  404    ;   put_char(Out, ' ')
  405    ),
  406    attribute(H, Out, State),
  407    attributes(T, Sep, Out, State).
  408
  409attribute(Name=Value, Out, State) :-
  410    emit_name(Name, Out, State),
  411    put_char(Out, =),
  412    sgml_write_attribute(Out, Value, State).
  413
  414att_length(Atts, State, Len) :-
  415    att_length(Atts, State, 0, Len).
  416
  417att_length([], _, Len, Len).
  418att_length([A0|T], State, Len0, Len) :-
  419    alen(A0, State, AL),
  420    Len1 is Len0 + 1 + AL,
  421    att_length(T, State, Len1, Len).
  422
  423alen(ns(NS, _URI):Name=Value, _State, Len) :-
  424    !,
  425    atom_length(Value, AL),
  426    vlen(Name, NL),
  427    atom_length(NS, NsL),
  428    Len is AL+NL+NsL+3.
  429alen(URI:Name=Value, State, Len) :-
  430    !,
  431    atom_length(Value, AL),
  432    vlen(Name, NL),
  433    get_state(State, nsmap, Nsmap),
  434    (   memberchk(NS=URI, Nsmap)
  435    ->  atom_length(NS, NsL)
  436    ;   atom_length(URI, NsL)
  437    ),
  438    Len is AL+NL+NsL+3.
  439alen(Name=Value, _, Len) :-
  440    atom_length(Name, NL),
  441    vlen(Value, AL),
  442    Len is AL+NL+3.
  443
  444vlen(Value, Len) :-
  445    is_list(Value),
  446    !,
  447    vlen_list(Value, 0, Len).
  448vlen(Value, Len) :-
  449    atom_length(Value, Len).
  450
  451vlen_list([], L, L).
  452vlen_list([H|T], L0, L) :-
  453    atom_length(H, HL),
  454    (   L0 == 0
  455    ->  L1 is L0 + HL
  456    ;   L1 is L0 + HL + 1
  457    ),
  458    vlen_list(T, L1, L).
  459
  460
  461emit_name(Name, Out, _) :-
  462    atom(Name),
  463    !,
  464    write(Out, Name).
  465emit_name(ns(NS,_URI):Name, Out, _State) :-
  466    !,
  467    (  NS == ''
  468    -> write(Out, Name)
  469    ;  format(Out, '~w:~w', [NS, Name])
  470    ).
  471emit_name(URI:Name, Out, State) :-
  472    get_state(State, nsmap, NSMap),
  473    memberchk(NS=URI, NSMap),
  474    !,
  475    (   NS == []
  476    ->  write(Out, Name)
  477    ;   format(Out, '~w:~w', [NS, Name])
  478    ).
  479emit_name(Term, Out, _) :-              % error?
  480    write(Out, Term).
  481
  482%!  update_nsmap(+Attributes, -Attributes1, !State)
  483%
  484%   Modify the nsmap of State to reflect modifications due to xmlns
  485%   arguments.
  486%
  487%   @arg    Attributes1 is a copy of Attributes with all redundant
  488%           namespace attributes deleted.
  489
  490update_nsmap(Attributes, Attributes1, State) :-
  491    get_state(State, nsmap, Map0),
  492    update_nsmap(Attributes, Attributes1, Map0, Map),
  493    set_state(State, nsmap, Map).
  494
  495update_nsmap([], [], Map, Map).
  496update_nsmap([xmlns:NS=URI|T], Attrs, Map0, Map) :-
  497    !,
  498    (   memberchk(NS=URI, Map0)
  499    ->  update_nsmap(T, Attrs, Map0, Map)
  500    ;   set_nsmap(NS, URI, Map0, Map1),
  501        Attrs = [xmlns:NS=URI|Attrs1],
  502        update_nsmap(T, Attrs1, Map1, Map)
  503    ).
  504update_nsmap([xmlns=URI|T], Attrs, Map0, Map) :-
  505    !,
  506    (   memberchk([]=URI, Map0)
  507    ->  update_nsmap(T, Attrs, Map0, Map)
  508    ;   set_nsmap([], URI, Map0, Map1),
  509        Attrs = [xmlns=URI|Attrs1],
  510        update_nsmap(T, Attrs1, Map1, Map)
  511    ).
  512update_nsmap([H|T0], [H|T], Map0, Map) :-
  513    !,
  514    update_nsmap(T0, T, Map0, Map).
  515
  516set_nsmap(NS, URI, Map0, Map) :-
  517    select(NS=_, Map0, Map1),
  518    !,
  519    Map = [NS=URI|Map1].
  520set_nsmap(NS, URI, Map, [NS=URI|Map]).
  521
  522
  523%!  content(+Content, +Out, +Element, +State, +Options)
  524%
  525%   Emit the content part of a structure  as well as the termination
  526%   for the content. For empty content   we have three versions: XML
  527%   style '/>', SGML declared EMPTY element (nothing) or normal SGML
  528%   element (we must close with the same element name).
  529
  530content([], Out, Element, State) :-    % empty element
  531    !,
  532    (   get_state(State, net, true)
  533    ->  (   get_state(State, dialect, xml) ->
  534            write(Out, />)
  535        ;   empty_element(State, Element) ->
  536            write(Out, >)
  537        ;   write(Out, //)
  538        )
  539    ;/* get_state(State, net, false) */
  540        write(Out, >),
  541        (   get_state(State, dialect, sgml),
  542            empty_element(State, Element)
  543        ->  true
  544        ;   emit_close(Element, Out, State)
  545        )
  546    ).
  547content([CDATA], Out, Element, State) :-
  548    atomic(CDATA),
  549    !,
  550    (   get_state(State, dialect, sgml),
  551        get_state(State, net, true),
  552        \+ sub_atom(CDATA, _, _, _, /),
  553        write_length(CDATA, Len, []),
  554        Len < 20
  555    ->  write(Out, /),
  556        sgml_write_content(Out, CDATA, State),
  557        write(Out, /)
  558    ;   verbatim_element(Element, State)
  559    ->  write(Out, >),
  560        write(Out, CDATA),
  561        emit_close(Element, Out, State)
  562    ;/* XML or not NET */
  563            write(Out, >),
  564        sgml_write_content(Out, CDATA, State),
  565        emit_close(Element, Out, State)
  566    ).
  567content(Content, Out, Element, State) :-
  568    get_state(State, layout, true),
  569    /* If xml:space='preserve' is present, */
  570        /* we MUST NOT tamper with white space at all. */
  571        \+ (Element = element(_,Atts,_),
  572        memberchk('xml:space'=preserve, Atts)
  573    ),
  574    element_content(Content, Elements),
  575    !,
  576    format(Out, >, []),
  577    \+ \+ (
  578        inc_indent(State),
  579        write_element_content(Elements, Out, State)
  580    ),
  581    write_indent(State, Out),
  582    emit_close(Element, Out, State).
  583content(Content, Out, Element, State) :-
  584    format(Out, >, []),
  585    write_mixed_content(Content, Out, Element, State),
  586    emit_close(Element, Out, State).
  587
  588verbatim_element(Element, State) :-
  589    verbatim_element(Element),
  590    get_state(State, dtd, DTD),
  591    DTD \== (-),
  592    dtd_property(DTD, doctype(html)).
  593
  594verbatim_element(script).
  595verbatim_element(style).
  596
  597emit_close(Element, Out, State) :-
  598    write(Out, '</'),
  599    emit_name(Element, Out, State),
  600    write(Out, '>').
  601
  602
  603write_mixed_content([], _, _, _).
  604write_mixed_content([H|T], Out, Element, State) :-
  605    write_mixed_content_element(H, Out, State),
  606    write_mixed_content(T, Out, Element, State).
  607
  608write_mixed_content_element(H, Out, State) :-
  609    (   atom(H)
  610    ->  sgml_write_content(Out, H, State)
  611    ;   string(H)
  612    ->  sgml_write_content(Out, H, State)
  613    ;   functor(H, element, 3)
  614    ->  emit(H, Out, State)
  615    ;   functor(H, pi, 1)
  616    ->  emit(H, Out, State)
  617    ;   var(H)
  618    ->  instantiation_error(H)
  619    ;   H = sdata(Data)             % cannot be written without entity!
  620    ->  print_message(warning, sgml_write(sdata_as_cdata(Data))),
  621        sgml_write_content(Out, Data, State)
  622    ;   type_error(sgml_content, H)
  623    ).
  624
  625
  626element_content([], []).
  627element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :-
  628    !,
  629    element_content(T0, T).
  630element_content([Blank|T0], T) :-
  631    atom(Blank),
  632    atom_codes(Blank, Codes),
  633    all_blanks(Codes),
  634    element_content(T0, T).
  635
  636all_blanks([]).
  637all_blanks([H|T]) :-
  638    code_type(H, space),
  639    all_blanks(T).
  640
  641write_element_content([], _, _).
  642write_element_content([H|T], Out, State) :-
  643    write_indent(State, Out),
  644    emit(H, Out, State),
  645    write_element_content(T, Out, State).
  646
  647
  648                 /*******************************
  649                 *           NAMESPACES         *
  650                 *******************************/
  651
  652%!  add_missing_namespaces(+DOM0, +NsMap, -DOM)
  653%
  654%   Add xmlns:NS=URI definitions to the toplevel element(s) to
  655%   deal with missing namespaces.
  656
  657add_missing_namespaces([], _, []) :- !.
  658add_missing_namespaces([H0|T0], Def, [H|T]) :-
  659    !,
  660    add_missing_namespaces(H0, Def, H),
  661    add_missing_namespaces(T0, Def, T).
  662add_missing_namespaces(Elem0, Def, Elem) :-
  663    Elem0 = element(Name, Atts0, Content),
  664    !,
  665    missing_namespaces(Elem0, Def, Missing),
  666    (   Missing == []
  667    ->  Elem = Elem0
  668    ;   add_missing_ns(Missing, Atts0, Atts),
  669        Elem = element(Name, Atts, Content)
  670    ).
  671add_missing_namespaces(DOM, _, DOM).    % CDATA, etc.
  672
  673add_missing_ns([], Atts, Atts).
  674add_missing_ns([H|T], Atts0, Atts) :-
  675    generate_ns(H, NS, URL),
  676    add_missing_ns(T, [xmlns:NS=URL|Atts0], Atts).
  677
  678%!  generate_ns(+URI, -NS, -URL) is det.
  679%
  680%   Generate a namespace (NS) identifier for URI.
  681
  682generate_ns(URI, NS, URI) :-
  683    xmlns(NS, URI),
  684    !.
  685generate_ns(ns(NS, URI), NS, URI) :-
  686    !.
  687generate_ns(URI, NS, URI) :-
  688    default_ns(URI, NS),
  689    !.
  690generate_ns(URI, NS, URI) :-
  691    gensym(xns, NS).
  692
  693%!  xmlns(?NS, ?URI) is nondet.
  694%
  695%   Hook to define human readable  abbreviations for XML namespaces.
  696%   xml_write/3 tries these locations:
  697%
  698%     1. This hook
  699%     2. Defaults (see below)
  700%     3. rdf_db:ns/2 for RDF-DB integration
  701%
  702%   Default XML namespaces are:
  703%
  704%     | xsi    | http://www.w3.org/2001/XMLSchema-instance |
  705%     | xs     | http://www.w3.org/2001/XMLSchema          |
  706%     | xhtml  | http://www.w3.org/1999/xhtml              |
  707%     | soap11 | http://schemas.xmlsoap.org/soap/envelope/ |
  708%     | soap12 | http://www.w3.org/2003/05/soap-envelope   |
  709%
  710%   @see xml_write/2, rdf_register_ns/2.
  711
  712:- multifile
  713    rdf_db:ns/2.  714
  715default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
  716default_ns('http://www.w3.org/2001/XMLSchema',          xs).
  717default_ns('http://www.w3.org/1999/xhtml',              xhtml).
  718default_ns('http://schemas.xmlsoap.org/soap/envelope/', soap11).
  719default_ns('http://www.w3.org/2003/05/soap-envelope',   soap12).
  720default_ns(URI, NS) :-
  721    rdf_db:ns(NS, URI).
  722
  723%!  missing_namespaces(+DOM, +NSMap, -Missing)
  724%
  725%   Return a list of URIs appearing in DOM that are not covered
  726%   by xmlns definitions.
  727
  728missing_namespaces(DOM, Defined, Missing) :-
  729    missing_namespaces(DOM, Defined, [], Missing).
  730
  731missing_namespaces([], _, L, L) :- !.
  732missing_namespaces([H|T], Def, L0, L) :-
  733    !,
  734    missing_namespaces(H, Def, L0, L1),
  735    missing_namespaces(T, Def, L1, L).
  736missing_namespaces(element(Name, Atts, Content), Def, L0, L) :-
  737    !,
  738    update_nsmap(Atts, _, Def, Def1),
  739    missing_ns(Name, Def1, L0, L1),
  740    missing_att_ns(Atts, Def1, L1, L2),
  741    missing_namespaces(Content, Def1, L2, L).
  742missing_namespaces(_, _, L, L).
  743
  744missing_att_ns([], _, M, M).
  745missing_att_ns([Name=_|T], Def, M0, M) :-
  746    missing_ns(Name, Def, M0, M1),
  747    missing_att_ns(T, Def, M1, M).
  748
  749missing_ns(ns(NS, URI):_, Def, M0, M) :-
  750    !,
  751    (   (   memberchk(NS=URI, Def)
  752        ;   memberchk(NS=URI, M0)
  753        )
  754    -> M = M0
  755    ;  NS == ''
  756    -> M = M0
  757    ;  M = [ns(NS, URI)|M0]
  758    ).
  759missing_ns(URI:_, Def, M0, M) :-
  760    !,
  761    (   (   memberchk(_=URI, Def)
  762        ;   memberchk(URI, M0)
  763        ;   URI = xml               % predefined ones
  764        ;   URI = xmlns
  765        )
  766    ->  M = M0
  767    ;   M = [URI|M0]
  768    ).
  769missing_ns(_, _, M, M).
  770
  771                 /*******************************
  772                 *         QUOTED WRITE         *
  773                 *******************************/
  774
  775sgml_write_attribute(Out, Values, State) :-
  776    is_list(Values),
  777    !,
  778    get_state(State, entity_map, EntityMap),
  779    put_char(Out, '"'),
  780    write_quoted_list(Values, Out, '"<&\r\n\t', EntityMap),
  781    put_char(Out, '"').
  782sgml_write_attribute(Out, Value, State) :-
  783    is_text(Value),
  784    !,
  785    get_state(State, entity_map, EntityMap),
  786    put_char(Out, '"'),
  787    write_quoted(Out, Value, '"<&\r\n\t', EntityMap),
  788    put_char(Out, '"').
  789sgml_write_attribute(Out, Value, _State) :-
  790    number(Value),
  791    !,
  792    format(Out, '"~w"', [Value]).
  793sgml_write_attribute(_, Value, _) :-
  794    type_error(sgml_attribute_value, Value).
  795
  796write_quoted_list([], _, _, _).
  797write_quoted_list([H|T], Out, Escape, EntityMap) :-
  798    write_quoted(Out, H, Escape, EntityMap),
  799    (   T == []
  800    ->  true
  801    ;   put_char(Out, ' '),
  802        write_quoted_list(T, Out, Escape, EntityMap)
  803    ).
  804
  805
  806sgml_write_content(Out, Value, State) :-
  807    is_text(Value),
  808    !,
  809    get_state(State, entity_map, EntityMap),
  810    write_quoted(Out, Value, '<&>\r', EntityMap).
  811sgml_write_content(Out, Value, _) :-
  812    write(Out, Value).
  813
  814is_text(Value) :- atom(Value), !.
  815is_text(Value) :- string(Value), !.
  816
  817write_quoted(Out, Atom, Escape, EntityMap) :-
  818    atom(Atom),
  819    !,
  820    atom_codes(Atom, Codes),
  821    writeq(Codes, Out, Escape, EntityMap).
  822write_quoted(Out, String, Escape, EntityMap) :-
  823    string(String),
  824    !,
  825    string_codes(String, Codes),
  826    writeq(Codes, Out, Escape, EntityMap).
  827write_quoted(_, String, _, _) :-
  828    type_error(atom_or_string, String).
  829
  830
  831%!  writeq(+Text:codes, +Out:stream, +Escape:atom, +Escape:assoc) is det.
  832
  833writeq([], _, _, _).
  834writeq([H|T], Out, Escape, EntityMap) :-
  835    (   char_code(HC, H),
  836        sub_atom(Escape, _, _, _, HC)
  837    ->  write_entity(H, Out, EntityMap)
  838    ;   H >= 256
  839    ->  (   stream_property(Out, encoding(Enc)),
  840            unicode_encoding(Enc)
  841        ->  put_code(Out, H)
  842        ;   write_entity(H, Out, EntityMap)
  843        )
  844    ;   put_code(Out, H)
  845    ),
  846    writeq(T, Out, Escape, EntityMap).
  847
  848unicode_encoding(utf8).
  849unicode_encoding(wchar_t).
  850unicode_encoding(unicode_le).
  851unicode_encoding(unicode_be).
  852
  853write_entity(Code, Out, EntityMap) :-
  854    (   get_assoc(Code, EntityMap, EntityName)
  855    ->  format(Out, '&~w;', [EntityName])
  856    ;   format(Out, '&#x~16R;', [Code])
  857    ).
  858
  859
  860                 /*******************************
  861                 *          INDENTATION         *
  862                 *******************************/
  863
  864write_initial_indent(State, Out) :-
  865    (   get_state(State, indent, Indent),
  866        Indent > 0
  867    ->  emit_indent(Indent, Out)
  868    ;   true
  869    ).
  870
  871write_indent(State, _) :-
  872    get_state(State, layout, false),
  873    !.
  874write_indent(State, Out) :-
  875    get_state(State, indent, Indent),
  876    emit_indent(Indent, Out).
  877
  878emit_indent(Indent, Out) :-
  879    Tabs is Indent // 8,
  880    Spaces is Indent mod 8,
  881    format(Out, '~N', []),
  882    write_n(Tabs, '\t', Out),
  883    write_n(Spaces, ' ', Out).
  884
  885write_n(N, Char, Out) :-
  886    (   N > 0
  887    ->  put_char(Out, Char),
  888        N2 is N - 1,
  889        write_n(N2, Char, Out)
  890    ;   true
  891    ).
  892
  893inc_indent(State) :-
  894    inc_indent(State, 2).
  895
  896inc_indent(State, Inc) :-
  897    state(indent, Arg),
  898    arg(Arg, State, I0),
  899    I is I0 + Inc,
  900    setarg(Arg, State, I).
  901
  902
  903                 /*******************************
  904                 *         DTD HANDLING         *
  905                 *******************************/
  906
  907%!  empty_element(+State, +Element)
  908%
  909%   True if Element is declared  with   EMPTY  content in the (SGML)
  910%   DTD.
  911
  912empty_element(State, Element) :-
  913    get_state(State, dtd, DTD),
  914    DTD \== (-),
  915    dtd_property(DTD, element(Element, _, empty)).
  916
  917%!  dtd_character_entities(+DTD, -Map)
  918%
  919%   Return an assoc mapping character entities   to their name. Note
  920%   that the entity representation is a bit dubious. Entities should
  921%   allow for a wide-character version and avoid the &#..; trick.
  922
  923dtd_character_entities(DTD, Map) :-
  924    empty_assoc(Empty),
  925    dtd_property(DTD, entities(Entities)),
  926    fill_entity_map(Entities, DTD, Empty, Map).
  927
  928fill_entity_map([], _, Map, Map).
  929fill_entity_map([H|T], DTD, Map0, Map) :-
  930    (   dtd_property(DTD, entity(H, CharEntity)),
  931        atom(CharEntity),
  932        (   sub_atom(CharEntity, 0, _, _, '&#'),
  933            sub_atom(CharEntity, _, _, 0, ';')
  934        ->  sub_atom(CharEntity, 2, _, 1, Name),
  935            atom_number(Name, Code)
  936        ;   atom_length(CharEntity, 1),
  937            char_code(CharEntity, Code)
  938        )
  939    ->  put_assoc(Code, Map0, H, Map1),
  940        fill_entity_map(T, DTD, Map1, Map)
  941    ;   fill_entity_map(T, DTD, Map0, Map)
  942    ).
  943
  944
  945
  946                 /*******************************
  947                 *            FIELDS            *
  948                 *******************************/
  949
  950state(indent,     1).                   % current indentation
  951state(layout,     2).                   % use layout (true/false)
  952state(dtd,        3).                   % DTD for entity names
  953state(entity_map, 4).                   % compiled entity-map
  954state(dialect,    5).                   % xml/sgml
  955state(nsmap,      6).                   % defined namespaces
  956state(net,        7).                   % Should null end-tags be used?
  957state(cleanns,    8).                   % Remove duplicate xmlns declarations
  958
  959new_state(Dialect,
  960    state(
  961        0,              % indent
  962        true,           % layout
  963        -,              % DTD
  964        EntityMap,      % entity_map
  965        Dialect,        % dialect
  966        [],             % NS=Full map
  967        Net,            % Null End-Tags?
  968        true            % Remove duplicate xmlns declarations
  969    )) :-
  970    (   Dialect == sgml
  971    ->  Net = false,
  972        empty_assoc(EntityMap)
  973    ;   Net = true,
  974        xml_entities(EntityMap)
  975    ).
  976
  977get_state(State, Field, Value) :-
  978    state(Field, Arg),
  979    arg(Arg, State, Value).
  980
  981set_state(State, Field, Value) :-
  982    state(Field, Arg),
  983    setarg(Arg, State, Value).
  984
  985term_expansion(xml_entities(map),
  986               xml_entities(Map)) :-
  987    list_to_assoc([ 0'< - lt,
  988                    0'& - amp,
  989                    0'> - gt,
  990                    0'\' - apos,
  991                    0'\" - quot
  992                  ], Map).
  993xml_entities(map).
  994
  995                 /*******************************
  996                 *            MESSAGES          *
  997                 *******************************/
  998
  999:- multifile
 1000    prolog:message/3. 1001
 1002prolog:message(sgml_write(sdata_as_cdata(Data))) -->
 1003    [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ]