View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2006-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(pldoc_wiki,
   39          [ wiki_codes_to_dom/3,        % +Codes, +Args, -DOM
   40            wiki_lines_to_dom/3,        % +Lines, +Map, -DOM
   41            section_comment_header/3,   % +Lines, -Header, -RestLines
   42            summary_from_lines/2,       % +Lines, -Codes
   43            indented_lines/3,           % +Text, +PrefixChars, -Lines
   44            strip_leading_par/2,        % +DOM0, -DOM
   45            autolink_extension/2,       % ?Extension, ?Type
   46            autolink_file/2             % +FileName, -Type
   47          ]).   48:- use_module(library(lists)).   49:- use_module(library(debug)).   50:- use_module(library(error)).   51:- use_module(library(pairs)).   52:- use_module(library(option)).   53:- use_module(library(debug)).   54:- use_module(library(apply)).   55:- use_module(library(dcg/basics)).   56
   57:- use_module(doc_util).   58
   59
   60/** <module> PlDoc wiki parser
   61
   62This file defines the PlDoc wiki parser,  which parses both comments and
   63wiki text files. The original version of this SWI-Prolog wiki format was
   64largely modeled after Twiki (http://twiki.org/).  The current version is
   65extended to take many aspects from   markdown, in particular the doxygen
   66refinement thereof.
   67
   68@see http://www.stack.nl/~dimitri/doxygen/manual/markdown.html
   69*/
   70
   71:- multifile
   72    prolog:doc_wiki_face//2,        % -Out, +VarNames
   73    prolog:doc_url_expansion/3,     % +Alias(Rest), -HREF, -Label
   74    prolog:url_expansion_hook/3,    % +Term, -Ref, -Label
   75    prolog:doc_autolink_extension/2.% +Extension, -Type
   76
   77
   78                 /*******************************
   79                 *          WIKI PARSING        *
   80                 *******************************/
   81
   82%!  wiki_lines_to_dom(+Lines:lines, +Args:list(atom), -Term) is det
   83%
   84%   Translate a Wiki text into  an   HTML  term suitable for html//1
   85%   from the html_write library.
   86
   87wiki_lines_to_dom(Lines, Args, HTML) :-
   88    tokenize_lines(Lines, Tokens0),
   89    normalise_indentation(Tokens0, Tokens),
   90    wiki_structure(Tokens, -1, Pars),
   91    wiki_faces(Pars, Args, HTML).
   92
   93
   94%!  wiki_codes_to_dom(+String, +Args, -DOM) is det.
   95%
   96%   Translate a plain text into a DOM term.
   97%
   98%   @param String   Plain text.  Either a string or a list of codes.
   99
  100wiki_codes_to_dom(Codes, Args, DOM) :-
  101    indented_lines(Codes, [], Lines),
  102    wiki_lines_to_dom(Lines, Args, DOM).
  103
  104
  105%!  wiki_structure(+Lines:lines, +BaseIndent,
  106%!                 -Blocks:list(block)) is det
  107%
  108%   Get the structure in terms  of block-level elements: paragraphs,
  109%   lists and tables. This processing uses   a mixture of layout and
  110%   punctuation.
  111
  112wiki_structure([], _, []) :- !.
  113wiki_structure([_-[]|T], BI, Pars) :-          % empty lines
  114    !,
  115    wiki_structure(T, BI, Pars).
  116wiki_structure(Lines, _, [\tags(Tags)]) :-
  117    tags(Lines, Tags),
  118    !.
  119wiki_structure(Lines, BI, [P1|PL]) :-
  120    take_block(Lines, BI, P1, RestLines),
  121    wiki_structure(RestLines, BI, PL).
  122
  123%!  take_block(+Lines, +BaseIndent, ?Block, -RestLines) is semidet.
  124%
  125%   Take a block-structure from the input.  Defined block elements
  126%   are lists, table, hrule, section header and paragraph.
  127
  128take_block([_-[]|Lines], BaseIndent, Block, Rest) :-
  129    !,
  130    take_block(Lines, BaseIndent, Block, Rest).
  131take_block([N-_|_], BaseIndent, _, _) :-
  132    N < BaseIndent,
  133    !,
  134    fail.                           % less indented
  135take_block(Lines, BaseIndent, List, Rest) :-
  136    list_item(Lines, Type, Indent, LI, LIT, Rest0),
  137    !,
  138    Indent > BaseIndent,
  139    rest_list(Rest0, Type, Indent, LIT, [], Rest),
  140    List0 =.. [Type, LI],
  141    (   ul_to_dl(List0, List)
  142    ->  true
  143    ;   List0 = dl(Items)
  144    ->  List = dl(class=wiki, Items)
  145    ;   List = List0
  146    ).
  147take_block([N-['|'|RL1]|LT], _, Table, Rest) :-
  148    phrase(row(R0), RL1),
  149    take_table(LT, N, R0, Table, Rest),
  150    !.
  151take_block([0-[-,-|More]|LT], _, Block, LT) :-  % separation line
  152    maplist(=(-), More),
  153    !,
  154    Block = hr([]).
  155take_block([_-Line|LT], _, Block, LT) :-        % separation line
  156    ruler(Line),
  157    !,
  158    Block = hr([]).
  159take_block([_-[@|_]], _, _, _) :-              % starts @tags section
  160    !,
  161    fail.
  162take_block(Lines, _BaseIndent, Section, RestLines) :-
  163    section_header(Lines, Section, RestLines),
  164    !.
  165take_block([_-Verb|Lines], _, Verb, Lines) :-
  166    verbatim_term(Verb),
  167    !.
  168take_block([I-L1|LT], BaseIndent, Elem, Rest) :-
  169    !,
  170    append(L1, PT, Par),
  171    rest_par(LT, PT, I, BaseIndent, MaxI, Rest),
  172    (   MaxI >= BaseIndent+16
  173    ->  Elem = center(Par)
  174    ;   phrase(blockquote(BQ), Par)
  175    ->  Elem = blockquote(BQ)
  176    ;   Elem = p(Par)
  177    ).
  178take_block([Verb|Lines], _, Verb, Lines).
  179
  180blockquote(Clean) -->
  181    [>, ' '],
  182    bq_lines(Clean).
  183
  184bq_lines([' '|Par]) -->
  185    ['\n'], !, [>,' '],
  186    bq_lines(Par).
  187bq_lines([H|T]) -->
  188    [H],
  189    bq_lines(T).
  190bq_lines([]) -->
  191    [].
  192
  193
  194%!  ruler(+Line) is semidet.
  195%
  196%   True if Line contains 3 ruler chars and otherwise spaces.
  197
  198ruler([C0|Line]) :-
  199    rule_char(C0),
  200    phrase(ruler(C0, 1), Line).
  201
  202ruler(C, N) --> [C], !, { N2 is N+1 }, ruler(C, N2).
  203ruler(C, N) --> [' '], !, ruler(C, N).
  204ruler(_, N) --> { N >= 3 }.
  205
  206rule_char('-').
  207rule_char('_').
  208rule_char('*').
  209
  210%!  list_item(+Lines, ?Type, ?Indent, -LI0, -LIT, -RestLines) is det.
  211%
  212%   Create a list-item. Naturally this should produce a single item,
  213%   but DL lists produce two items, so   we create the list of items
  214%   as a difference list.
  215%
  216%   @tbd    Pass base-indent
  217
  218list_item([Indent-Line|LT], Type, Indent, Items, ItemT, Rest) :-
  219    !,
  220    list_item_prefix(Type, Line, L1),
  221    (   Type == dl
  222    ->  split_dt(L1, DT0, DD1),
  223        append(DD1, LIT, DD),
  224        strip_ws_tokens(DT0, DT),
  225        Items = [dt(DT),dd(DD)|ItemT]
  226    ;   append(L1, LIT, LI0),
  227        Items = [li(LI0)|ItemT]
  228    ),
  229    rest_list_item(LT, Type, Indent, LIT, Rest).
  230
  231%!  rest_list_item(+Lines, +Type, +Indent, -RestItem, -RestLines) is det
  232%
  233%   Extract the remainder (after the first line) of a list item.
  234
  235rest_list_item(Lines, _Type, Indent, RestItem, RestLines) :-
  236    take_blocks_at_indent(Lines, Indent, Blocks, RestLines),
  237    (   Blocks = [p(Par)|MoreBlocks]
  238    ->  append(['\n'|Par], MoreBlocks, RestItem)
  239    ;   RestItem = Blocks
  240    ).
  241
  242%!  take_blocks_at_indent(+Lines, +Indent, -Pars, -RestLines) is det.
  243%
  244%   Process paragraphs and verbatim blocks (==..==) in bullet-lists.
  245
  246take_blocks_at_indent(Lines, _, [], Lines) :-
  247    skip_empty_lines(Lines, Lines1),
  248    section_header(Lines1, _, _),
  249    !.
  250take_blocks_at_indent(Lines, N, [Block|RestBlocks], RestLines) :-
  251    take_block(Lines, N, Block, Rest0),
  252    !,
  253    take_blocks_at_indent(Rest0, N, RestBlocks, RestLines).
  254take_blocks_at_indent(Lines, _, [], Lines).
  255
  256
  257%!  rest_list(+Lines, +Type, +Indent,
  258%!            -Items, -ItemTail, -RestLines) is det.
  259
  260rest_list(Lines, Type, N, Items, IT, Rest) :-
  261    skip_empty_lines(Lines, Lines1),
  262    list_item(Lines1, Type, N, Items, IT0, Rest0),
  263    !,
  264    rest_list(Rest0, Type, N, IT0, IT, Rest).
  265rest_list(Rest, _, _, IT, IT, Rest).
  266
  267%!  list_item_prefix(?Type, +Line, -Rest) is det.
  268
  269list_item_prefix(ul, [*, ' '|T], T) :- !.
  270list_item_prefix(ul, [-, ' '|T], T) :- !.
  271list_item_prefix(dl, [$, ' '|T], T) :-
  272    split_dt(T, _, _),
  273    !.
  274list_item_prefix(ol, [w(N), '.', ' '|T], T) :-
  275    atom_codes(N, [D]),
  276    between(0'0, 0'9, D).
  277
  278%!  split_dt(+LineAfterDollar, -DT, -Rest)
  279%
  280%   First see whether the entire line is the item. This allows
  281%   creating items holding : by using $ <tokens> :\n
  282
  283split_dt(In, DT, []) :-
  284    append(DT, [':'], In),
  285    !.
  286split_dt(In, DT, Rest) :-
  287    append(DT, [':'|Rest0], In),
  288    (   Rest0 == []
  289    ->  Rest = []
  290    ;   Rest0 = [' '|Rest]
  291    ),
  292    !.
  293
  294
  295%!  ul_to_dl(+UL, -DL) is semidet.
  296%
  297%   Translate an UL list into a DL list   if  all entries are of the
  298%   form "* <term> nl, <description>" and at least one <description>
  299%   is   non-empty,   or    all    items     are    of    the   form
  300%   [[PredicateIndicator]].
  301
  302ul_to_dl(ul(Items), Description) :-
  303    term_items(Items, DLItems, []),
  304    (   terms_to_predicate_includes(DLItems, Preds)
  305    ->  Description = dl(class(predicates), Preds)
  306    ;   member(dd(DD), DLItems), DD \== []
  307    ->  Description = dl(class(termlist), DLItems)
  308    ).
  309
  310term_items([], T, T).
  311term_items([LI|LIs], DLItems, Tail) :-
  312    term_item(LI, DLItems, Tail1),
  313    term_items(LIs, Tail1, Tail).
  314
  315%!  term_item(+LI, -DLItem, ?Tail) is semidet.
  316%
  317%   If LI is of the form <Term> followed  by a newline, return it as
  318%   dt-dd  tuple.  The  <dt>  item    contains  a  term
  319%
  320%       \term(Text, Term, Bindings).
  321
  322term_item(li(Tokens),
  323          [ dt(class=term, \term(Text, Term, Bindings)),
  324            dd(Descr)
  325          | Tail
  326          ], Tail) :-
  327    (   (   append(TermTokens, ['\n'|Descr], Tokens)
  328        ->  true
  329        ;   TermTokens = Tokens,
  330            Descr = []
  331        )
  332    ->  with_output_to(string(Tmp),
  333                       ( forall(member(T, TermTokens),
  334                                write_token(T)),
  335                         write(' .\n'))),
  336        E = error(_,_),
  337        catch(setup_call_cleanup(
  338                  open_string(Tmp, In),
  339                  ( read_dt_term(In, Term, Bindings),
  340                    read_dt_term(In, end_of_file, []),
  341                    atom_string(Text, Tmp)
  342                  ),
  343                  close(In)),
  344              E, fail)
  345    ).
  346
  347write_token(w(X)) :-
  348    !,
  349    write(X).
  350write_token(X) :-
  351    write(X).
  352
  353read_dt_term(In, Term, Bindings) :-
  354    read_term(In, Term,
  355              [ variable_names(Bindings),
  356                module(pldoc_modes)
  357              ]).
  358
  359terms_to_predicate_includes([], []).
  360terms_to_predicate_includes([dt(class=term, \term(_, [[PI]], [])), dd([])|T0],
  361                            [\include(PI, predicate, [])|T]) :-
  362    is_pi(PI),
  363    terms_to_predicate_includes(T0, T).
  364
  365is_pi(Name/Arity) :-
  366    atom(Name),
  367    integer(Arity),
  368    between(0, 20, Arity).
  369is_pi(Name//Arity) :-
  370    atom(Name),
  371    integer(Arity),
  372    between(0, 20, Arity).
  373
  374
  375%!  row(-Cells)// is det.
  376
  377row([C0|CL]) -->
  378    cell(C0),
  379    !,
  380    row(CL).
  381row([]) -->
  382    [].
  383
  384cell(td(C)) -->
  385    face_tokens(C0),
  386    ['|'],
  387    !,
  388    { strip_ws_tokens(C0, C)
  389    }.
  390
  391face_tokens([]) -->
  392    [].
  393face_tokens(Tokens) -->
  394    face_token(H),                          % Deal with embedded *|...|*, etc.
  395    token('|'),
  396    face_tokens(Face),
  397    token('|'),
  398    face_token(H),
  399    !,
  400    { append([[H,'|'], Face, ['|', H], Rest], Tokens) },
  401    face_tokens(Rest).
  402face_tokens([H|T]) -->
  403    token(H),
  404    face_tokens(T).
  405
  406face_token(=) --> [=].
  407face_token(*) --> [*].
  408face_token('_') --> ['_'].
  409
  410take_table(Lines, Indent, Row0, Table, Rest) :-
  411    rest_table(Lines, Indent, Rows, Rest),
  412    (   Rows = [align(Align)|Rows1]
  413    ->  maplist(align_row(Align), Rows1, Rows2),
  414        (   maplist(=(td([])), Row0)        % empty header
  415        ->  Table = table(class=wiki, Rows2)
  416        ;   maplist(td_to_th, Row0, Header),
  417            Table = table(class=wiki, [tr(Header)|Rows2])
  418        )
  419    ;   Table = table(class=wiki, [tr(Row0)|Rows])
  420    ).
  421
  422td_to_th(td(X), th(X)) :- !.
  423td_to_th(X, X).
  424
  425align_row(Align, tr(Row0), tr(Row)) :-
  426    align_cells(Align, Row0, Row).
  427
  428align_cells([Align|AT], [Cell0|T0], [Cell|T]) :-
  429    align_cell(Align, Cell0, Cell),
  430    align_cells(AT, T0, T).
  431align_cells(_, Cells, Cells).
  432
  433align_cell(Align, td(Content), td(class=Align, Content)).
  434
  435%!  rest_table(+Lines, +Indent, -Rows, -RestLines).
  436
  437rest_table([N-Line|LT], N, [align(Align)|RL], Rest) :-
  438    phrase(column_alignment(Align), Line),
  439    !,
  440    rest_table2(LT, N, RL, Rest).
  441rest_table(Lines, N, RL, Rest) :-
  442    rest_table2(Lines, N, RL, Rest).
  443
  444rest_table2([N-['|'|RL1]|LT], N, [tr(R0)|RL], Rest) :-
  445    !,
  446    phrase(row(R0), RL1),
  447    rest_table2(LT, N, RL, Rest).
  448rest_table2(Rest, _, [], Rest).
  449
  450%!  column_alignment(-Alignment) is semidet.
  451%
  452%   Process an alignment line.
  453
  454column_alignment([H|T]) -->
  455    ['|'],
  456    (   colspec(H)
  457    ->  column_alignment(T)
  458    ;   {T=[]}
  459    ).
  460
  461colspec(Align) -->
  462    ws_tokens, [':'], dashes3,
  463    (   [':']
  464    ->  {Align = center}
  465    ;   {Align = left}
  466    ),
  467    ws_tokens.
  468colspec(Align) -->
  469    ws_tokens, dashes3,
  470    (   [':']
  471    ->  {Align = right}
  472    ;   {Align = left}
  473    ),
  474    ws_tokens.
  475
  476dashes3 -->
  477    [-,-,-],
  478    dashes.
  479
  480dashes --> [-], !, dashes.
  481dashes --> [].
  482
  483ws_tokens --> [' '], !, ws_tokens.
  484ws_tokens --> [].
  485
  486%!  rest_par(+Lines, -Par,
  487%!           +BaseIndent, +MaxI0, -MaxI, -RestLines) is det.
  488%
  489%   Take the rest of a paragraph. Paragraphs   are  ended by a blank
  490%   line or the start of a list-item.   The latter is a bit dubious.
  491%   Why not a general  block-level   object?  The current definition
  492%   allows for writing lists without a blank line between the items.
  493
  494rest_par([], [], BI, MaxI0, MaxI, []) :-
  495    !,
  496    MaxI is max(BI, MaxI0).
  497rest_par([_-[]|Rest], [], _, MaxI, MaxI, Rest) :- !.
  498rest_par(Lines, [], _, MaxI, MaxI, Lines) :-
  499    Lines = [_-Verb|_],
  500    verbatim_term(Verb),
  501    !.
  502rest_par([I-L|Rest], [], _, MaxI, MaxI, [I-L|Rest]) :-
  503    list_item_prefix(_, L, _),
  504    !.
  505rest_par([I-L1|LT], ['\n'|Par], BI, MaxI0, MaxI, Rest) :-
  506    append(L1, PT, Par),
  507    MaxI1 is max(I, MaxI0),
  508    rest_par(LT, PT, BI, MaxI1, MaxI, Rest).
  509
  510
  511%!  section_header(+Lines, -Section, -RestLines) is semidet.
  512%
  513%   Get a section line from the input.
  514
  515section_header([_-L1|LT], Section, LT) :-
  516    twiki_section_line(L1, Section),
  517    !.
  518section_header([0-L1|LT], Section, LT) :-
  519    md_section_line(L1, Section),
  520    !.
  521section_header([_-L1,0-L2|LT], Section, LT) :-
  522    md_section_line(L1, L2, Section),
  523    !.
  524
  525%!  twiki_section_line(+Tokens, -Section) is semidet.
  526%
  527%   Extract a section using the Twiki   conventions. The section may
  528%   be preceeded by [Word], in which case we generate an anchor name
  529%   Word for the section.
  530
  531twiki_section_line([-,-,-|Rest], Section) :-
  532    plusses(Rest, Section).
  533
  534plusses([+, ' '|Rest], h1(Attrs, Content)) :-
  535    hdr_attributes(Rest, Attrs, Content).
  536plusses([+, +, ' '|Rest], h2(Attrs, Content)) :-
  537    hdr_attributes(Rest, Attrs, Content).
  538plusses([+, +, +, ' '|Rest], h3(Attrs, Content)) :-
  539    hdr_attributes(Rest, Attrs, Content).
  540plusses([+, +, +, +, ' '|Rest], h4(Attrs, Content)) :-
  541    hdr_attributes(Rest, Attrs, Content).
  542
  543hdr_attributes(List, Attrs, Content) :-
  544    strip_leading_ws(List, List2),
  545    (   List2 = ['[',w(Name),']'|List3]
  546    ->  strip_ws_tokens(List3, Content),
  547        Attrs = [class(wiki), id(Name)]
  548    ;   Attrs = class(wiki),
  549        strip_ws_tokens(List, Content)
  550    ).
  551
  552%!  md_section_line(+Tokens, -Section) is semidet.
  553%
  554%   Handle markdown section lines staring with #
  555
  556md_section_line([#, ' '|Rest], h1(Attrs, Content)) :-
  557    md_section_attributes(Rest, Attrs, Content).
  558md_section_line([#, #, ' '|Rest], h2(Attrs, Content)) :-
  559    md_section_attributes(Rest, Attrs, Content).
  560md_section_line([#, #, #, ' '|Rest], h3(Attrs, Content)) :-
  561    md_section_attributes(Rest, Attrs, Content).
  562md_section_line([#, #, #, #, ' '|Rest], h4(Attrs, Content)) :-
  563    md_section_attributes(Rest, Attrs, Content).
  564
  565md_section_attributes(List, Attrs, Content) :-
  566    phrase((tokens(Content), [' '], section_label(Label)), List),
  567    !,
  568    Attrs = [class(wiki), id(Label)].
  569md_section_attributes(Content, Attrs, Content) :-
  570    Attrs = [class(wiki)].
  571
  572section_label(Label) -->
  573    [ '{', '#', w(Name) ],
  574    label_conts(Cont), ['}'],
  575    !,
  576    { atomic_list_concat([Name|Cont], Label) }.
  577
  578label_conts([H|T]) --> label_cont(H), !, label_conts(T).
  579label_conts([]) --> [].
  580
  581label_cont(-) --> [-].
  582label_cont(Name) --> [w(Name)].
  583
  584
  585md_section_line(Line1, Line2, Header) :-
  586    Line1 \== [],
  587    section_underline(Line2, Type),
  588    is_list(Line1),
  589    phrase(wiki_words(_), Line1),  % Should not have structure elements
  590    !,
  591    (   phrase(labeled_section_line(Title, Attrs), Line1)
  592    ->  true
  593    ;   Title = Line1,
  594        Attrs = []
  595    ),
  596    Header =.. [Type, [class(wiki)|Attrs], Title].
  597
  598section_underline([=,=,=|T], h1) :-
  599    maplist(=(=), T),
  600    !.
  601section_underline([-,-,-|T], h2) :-
  602    maplist(=(-), T),
  603    !.
  604
  605labeled_section_line(Title, Attrs) -->
  606    tokens(Title), [' '], section_label(Label),
  607    !,
  608    { Attrs = [id(Label)] }.
  609
  610
  611%!  strip_ws_tokens(+Tokens, -Stripped)
  612%
  613%   Strip leading and trailing whitespace from a token list.  Note
  614%   the the whitespace is already normalised.
  615
  616strip_ws_tokens([' '|T0], T) :-
  617    !,
  618    strip_ws_tokens(T0, T).
  619strip_ws_tokens(L0, L) :-
  620    append(L, [' '], L0),
  621    !.
  622strip_ws_tokens(L, L).
  623
  624
  625%!  strip_leading_ws(+Tokens, -Stripped) is det.
  626%
  627%   Strip leading whitespace from a token list.
  628
  629strip_leading_ws([' '|T], T) :- !.
  630strip_leading_ws(T, T).
  631
  632
  633                 /*******************************
  634                 *             TAGS             *
  635                 *******************************/
  636
  637%!  tags(+Lines:lines, -Tags) is semidet.
  638%
  639%   If the first line is a @tag, read the remainder of the lines to
  640%   a list of \tag(Name, Value) terms.
  641
  642tags(Lines, Tags) :-
  643    collect_tags(Lines, Tags0),
  644    keysort(Tags0, Tags1),
  645    pairs_values(Tags1, Tags2),
  646    combine_tags(Tags2, Tags).
  647
  648%!  collect_tags(+IndentedLines, -Tags) is semidet
  649%
  650%   Create a list Order-tag(Tag,Tokens) for   each @tag encountered.
  651%   Order is the desired position as defined by tag_order/2.
  652%
  653%   @tbd Tag content is  often  poorly   aligned.  We  now  find the
  654%   alignment of subsequent lines  and  assume   the  first  line is
  655%   alligned with the remaining lines.
  656
  657collect_tags([], []).
  658collect_tags([Indent-[@,String|L0]|Lines], [Order-tag(Tag,Value)|Tags]) :-
  659    tag_name(String, Tag, Order),
  660    !,
  661    strip_leading_ws(L0, L),
  662    rest_tag(Lines, Indent, VT, RestLines),
  663    normalise_indentation(VT, VT1),
  664    wiki_structure([0-L|VT1], -1, Value0),
  665    strip_leading_par(Value0, Value),
  666    collect_tags(RestLines, Tags).
  667
  668
  669%!  tag_name(+String, -Tag:atom, -Order:int) is semidet.
  670%
  671%   If String denotes a know tag-name,
  672
  673tag_name(w(Name), Tag, Order) :-
  674    (   renamed_tag(Name, Tag, Level),
  675        tag_order(Tag, Order)
  676    ->  print_message(Level, pldoc(deprecated_tag(Name, Tag)))
  677    ;   tag_order(Name, Order)
  678    ->  Tag = Name
  679    ;   print_message(warning, pldoc(unknown_tag(Name))),
  680        fail
  681    ).
  682
  683
  684rest_tag([], _, [], []) :- !.
  685rest_tag(Lines, Indent, [], Lines) :-
  686    Lines = [Indent-[@,Word|_]|_],
  687    tag_name(Word, _, _),
  688    !.
  689rest_tag([L|Lines0], Indent, [L|VT], Lines) :-
  690    rest_tag(Lines0, Indent, VT, Lines).
  691
  692
  693%!  renamed_tag(+DeprecatedTag:atom, -Tag:atom, -Warn) is semidet.
  694%
  695%   Declaration for deprecated tags.
  696
  697renamed_tag(exception, throws, warning).
  698renamed_tag(param,     arg,    silent).
  699
  700
  701%!  tag_order(+Tag:atom, -Order:int) is semidet.
  702%
  703%   Both declares the know tags and  their expected order. Currently
  704%   the tags are forced into  this   order  without  warning. Future
  705%   versions may issue a warning if the order is inconsistent.
  706
  707:- multifile
  708    pldoc:tag_order/2.  709
  710tag_order(Tag, Order) :-
  711    pldoc:tag_order(Tag, Order),
  712    !.
  713tag_order(arg,         100).
  714tag_order(error,       200).            % same as throw
  715tag_order(throws,      300).
  716tag_order(author,      400).
  717tag_order(version,     500).
  718tag_order(see,         600).
  719tag_order(deprecated,  700).
  720tag_order(compat,      800).            % PlDoc extension
  721tag_order(copyright,   900).
  722tag_order(license,    1000).
  723tag_order(bug,        1100).
  724tag_order(tbd,        1200).
  725tag_order(since,      1300).
  726
  727%!  combine_tags(+Tags:list(tag(Key, Value)), -Tags:list) is det.
  728%
  729%   Creates the final tag-list.  Tags is a list of
  730%
  731%           * \params(list(param(Name, Descr)))
  732%           * \tag(Name, list(Descr))
  733%
  734%   Descr is a list of tokens.
  735
  736combine_tags([], []).
  737combine_tags([tag(arg, V1)|T0], [\args([P1|PL])|Tags]) :-
  738    !,
  739    arg_tag(V1, P1),
  740    arg_tags(T0, PL, T1),
  741    combine_tags(T1, Tags).
  742combine_tags([tag(Tag,V0)|T0], [\tag(Tag, [V0|Vs])|T]) :-
  743    same_tag(Tag, T0, T1, Vs),
  744    combine_tags(T1, T).
  745
  746arg_tag([PT|Descr0], arg(PN, Descr)) :-
  747    word_of(PT, PN),
  748    strip_leading_ws(Descr0, Descr).
  749
  750word_of(w(W), W) :- !.                  % TBD: check non-word arg
  751word_of(W, W).
  752
  753arg_tags([tag(arg, V1)|T0], [P1|PL], T) :-
  754    !,
  755    arg_tag(V1, P1),
  756    arg_tags(T0, PL, T).
  757arg_tags(T, [], T).
  758
  759same_tag(Tag, [tag(Tag, V)|T0], T, [V|Vs]) :-
  760    !,
  761    same_tag(Tag, T0, T, Vs).
  762same_tag(_, L, L, []).
  763
  764
  765                 /*******************************
  766                 *             FACES            *
  767                 *******************************/
  768
  769%!  wiki_faces(+Structure, +ArgNames, -HTML) is det.
  770%
  771%   Given the wiki structure, analyse the content of the paragraphs,
  772%   list items and table cells and apply font faces and links.
  773
  774wiki_faces([dt(Class, \term(Text, Term, Bindings)), dd(Descr0)|T0],
  775           ArgNames,
  776           [dt(Class, \term(Text, Term, Bindings)), dd(Descr)|T]) :-
  777    !,
  778    varnames(Bindings, VarNames, ArgNames),
  779    wiki_faces(Descr0, VarNames, Descr),
  780    wiki_faces(T0, ArgNames, T).
  781wiki_faces(DOM0, ArgNames, DOM) :-
  782    structure_term(DOM0, Functor, Content0),
  783    !,
  784    wiki_faces_list(Content0, ArgNames, Content),
  785    structure_term(DOM, Functor, Content).
  786wiki_faces(Verb, _, Verb) :-
  787    verbatim_term(Verb),
  788    !.
  789wiki_faces(Content0, ArgNames, Content) :-
  790    assertion(is_list(Content0)),
  791    phrase(wiki_faces(Content, ArgNames), Content0),
  792    !.
  793
  794varnames([], List, List).
  795varnames([Name=_|T0], [Name|T], List) :-
  796    varnames(T0, T, List).
  797
  798wiki_faces_list([], _, []).
  799wiki_faces_list([H0|T0], Args, [H|T]) :-
  800    wiki_faces(H0, Args, H),
  801    wiki_faces_list(T0, Args, T).
  802
  803%!  structure_term(+Term, -Functor, -Content) is semidet.
  804%!  structure_term(-Term, +Functor, +Content) is det.
  805%
  806%   (Un)pack a term describing structure, so  we can process Content
  807%   and re-pack the structure.
  808
  809structure_term(\tags(Tags), tags, [Tags]) :- !.
  810structure_term(\args(Params), args, [Params]) :- !.
  811structure_term(arg(Name,Descr), arg(Name), [Descr]) :- !.
  812structure_term(\tag(Name,Value), tag(Name), [Value]) :- !.
  813structure_term(\include(What,Type,Opts), include(What,Type,Opts), []) :- !.
  814structure_term(dl(Att, Args), dl(Att), [Args]) :- !.
  815structure_term(dt(Att, Args), dt(Att), [Args]) :- !.
  816structure_term(table(Att, Args), table(Att), [Args]) :- !.
  817structure_term(td(Att, Args), td(Att), [Args]) :- !.
  818structure_term(h1(Att, Args), h1(Att), [Args]) :- !.
  819structure_term(h2(Att, Args), h2(Att), [Args]) :- !.
  820structure_term(h3(Att, Args), h3(Att), [Args]) :- !.
  821structure_term(h4(Att, Args), h4(Att), [Args]) :- !.
  822structure_term(hr(Att), hr(Att), []) :- !.
  823structure_term(p(Args), p, [Args]) :- !.
  824structure_term(Term, Functor, Args) :-
  825    structure_term_any(Term, Functor, Args).
  826
  827structure_term(Term) :-
  828    structure_term_any(Term, _Functor, _Args).
  829
  830structure_term_any(Term, Functor, Args) :-
  831    functor(Term, Functor, 1),
  832    structure_tag(Functor),
  833    !,
  834    Term =.. [Functor|Args].
  835
  836structure_tag(ul).
  837structure_tag(ol).
  838structure_tag(dl).
  839structure_tag(li).
  840structure_tag(dt).
  841structure_tag(dd).
  842structure_tag(table).
  843structure_tag(tr).
  844structure_tag(td).
  845structure_tag(th).
  846structure_tag(blockquote).
  847structure_tag(center).
  848
  849
  850%!  verbatim_term(?Term) is det
  851%
  852%   True if Term must be passes verbatim.
  853
  854verbatim_term(pre(_,_)).
  855verbatim_term(\term(_,_,_)).
  856
  857%!  matches(:Goal, -Input, -Last)//
  858%
  859%   True when Goal runs successfully on the DCG input and Input
  860%   is the list of matched tokens.
  861
  862:- meta_predicate matches(2, -, -, ?, ?).  863
  864matches(Goal, Input, Last, List, Rest) :-
  865    call(Goal, List, Rest),
  866    input(List, Rest, Input, Last).
  867
  868input([H|T0], Rest, Input, Last) :-
  869    (   T0 == Rest
  870    ->  Input = [H],
  871        Last = H
  872    ;   Input = [H|T],
  873        input(T0, Rest, T, Last)
  874    ).
  875
  876
  877%!  wiki_faces(-WithFaces, +ArgNames)// is nondet.
  878%!  wiki_faces(-WithFaces, +ArgNames, +Options)// is nondet.
  879%
  880%   Apply font-changes and automatic  links   to  running  text. The
  881%   faces are applied after discovering   the structure (paragraphs,
  882%   lists, tables, keywords).
  883%
  884%   @arg Options is a dict, minimally containing `depth`
  885
  886wiki_faces(WithFaces, ArgNames, List, Rest) :-
  887    default_faces_options(Options),
  888    catch(wiki_faces(WithFaces, ArgNames, Options, List, Rest),
  889          pldoc(depth_limit),
  890          failed_faces(WithFaces, List, Rest)).
  891
  892default_faces_options(_{depth:5}).
  893
  894failed_faces(WithFaces) -->
  895    { debug(markdown(overflow), 'Depth limit exceeded', []) },
  896    wiki_words(WithFaces).
  897
  898wiki_faces([EmphTerm|T], ArgNames, Options) -->
  899    emphasis_seq(EmphTerm, ArgNames, Options),
  900    !,
  901    wiki_faces_int(T, ArgNames).
  902wiki_faces(Faces, ArgNames, Options) -->
  903    wiki_faces_int(Faces, ArgNames, Options).
  904
  905wiki_faces_int(WithFaces, ArgNames) -->
  906    { default_faces_options(Options)
  907    },
  908    wiki_faces_int(WithFaces, ArgNames, Options).
  909
  910wiki_faces_int([], _, _) -->
  911    [].
  912wiki_faces_int(List, ArgNames, Options) -->
  913    wiki_face(H, ArgNames, Options),
  914    !,
  915    {   is_list(H)
  916    ->  append(H, T, List)
  917    ;   List = [H|T]
  918    },
  919    wiki_faces(T, ArgNames, Options).
  920wiki_faces_int([Before,EmphTerm|T], ArgNames, Options) -->
  921    emphasis_before(Before),
  922    emphasis_seq(EmphTerm, ArgNames, Options),
  923    !,
  924    wiki_faces_int(T, ArgNames, Options).
  925wiki_faces_int([H|T], ArgNames, Options) -->
  926    wiki_face_simple(H, ArgNames, Options),
  927    !,
  928    wiki_faces_int(T, ArgNames, Options).
  929
  930next_level(Options0, Options) -->
  931    {   succ(NewDepth, Options0.depth)
  932    ->  Options = Options0.put(depth, NewDepth)
  933    ;   throw(pldoc(depth_limit))
  934    }.
  935
  936%!  prolog:doc_wiki_face(-Out, +VarNames)// is semidet.
  937%!  prolog:doc_wiki_face(-Out, +VarNames, +Options0)// is semidet.
  938%
  939%   Hook that can be  used  to   provide  additional  processing for
  940%   additional _inline_ wiki constructs.  The DCG list is a list of
  941%   tokens.  Defined tokens are:
  942%
  943%     - w(Atom)
  944%     Recognised word (alphanumerical)
  945%     - Atom
  946%     Single character atom representing punctuation marks or the
  947%     atom =|' '|= (space), representing white-space.
  948%
  949%   The  Out  variable  is  input  for    the  backends  defined  in
  950%   doc_latex.pl and doc_html.pl. Roughly, these   are terms similar
  951%   to what html//1 from library(http/html_write) accepts.
  952
  953wiki_face(Out, Args, _) -->
  954    prolog:doc_wiki_face(Out, Args),
  955    !.
  956wiki_face(var(Arg), ArgNames, _) -->
  957    [w(Arg)],
  958    { memberchk(Arg, ArgNames)
  959    },
  960    !.
  961wiki_face(b(Bold), ArgNames, Options) -->
  962    [*,'|'], string(Tokens), ['|',*],
  963    !,
  964    { phrase(wiki_faces(Bold, ArgNames, Options), Tokens) }.
  965wiki_face(i(Italic), ArgNames, Options) -->
  966    ['_','|'], string(Tokens), ['|','_'],
  967    !,
  968    { phrase(wiki_faces(Italic, ArgNames, Options), Tokens) }.
  969wiki_face(code(Code), _, _) -->
  970    [=], eq_code_words(Words), [=],
  971    !,
  972    { atomic_list_concat(Words, Code) }.
  973wiki_face(code(Code), _, _) -->
  974    [=,'|'], wiki_words(Code), ['|',=],
  975    !.
  976wiki_face(PredRef, _, _) -->
  977    ['`'], take_predref(PredRef), ['`'],
  978    !.
  979wiki_face(\nopredref(Pred), _, _) -->
  980    ['`', '`'], take_predref(\predref(Pred)), ['`', '`'],
  981    !.
  982wiki_face([flag, ' ', \flagref(Flag)], _, _) -->
  983    [ w('flag'), ' ', '`', w(Flag), '`' ],
  984    { current_prolog_flag(Flag, _) },
  985    !.
  986wiki_face(code(Code), _, _) -->
  987    ['`','`'], wiki_words(Code), ['`','`'],
  988    !.
  989wiki_face(Code, _, _) -->
  990    (   ['`'], code_words(Words), ['`']
  991    ->  { atomic_list_concat(Words, Text),
  992          E = error(_,_),
  993          catch(atom_to_term(Text, Term, Vars), E, fail),
  994          !,
  995          code_face(Text, Term, Vars, Code)
  996        }
  997    ).
  998wiki_face(Face, _, Options) -->
  999    [ w(Name) ], arg_list(List),
 1000    { atomic_list_concat([Name|List], Text),
 1001      E = error(_,_),
 1002      catch(atom_to_term(Text, Term, Vars), E, fail),
 1003      term_face(Text, Term, Vars, Face, Options)
 1004    },
 1005    !.
 1006wiki_face(br([]), _, _) -->
 1007    [<,w(br),>,'\n'], !.
 1008wiki_face(br([]), _, _) -->
 1009    [<,w(br),/,>,'\n'], !.
 1010        % Below this, we only do links.
 1011wiki_face(_, _, Options) -->
 1012    { Options.get(link) == false,
 1013      !,
 1014      fail
 1015    }.
 1016wiki_face(PredRef, _, _) -->
 1017    take_predref(PredRef),
 1018    !.
 1019wiki_face(\cite(Citations), _, _) -->
 1020    ['['], citations(Citations), [']'].
 1021wiki_face(\include(Name, Type, Options), _, _) -->
 1022    ['[','['], file_name(Base, Ext), [']',']'],
 1023    { autolink_extension(Ext, Type),
 1024      !,
 1025      file_name_extension(Base, Ext, Name),
 1026      resolve_file(Name, Options, [])
 1027    },
 1028    !.
 1029wiki_face(\include(Name, Type, [caption(Caption)|Options]), _, _) -->
 1030    (   ['!','['], tokens(100, Caption), [']','(']
 1031    ->  file_name(Base, Ext), [')'],
 1032        { autolink_extension(Ext, Type),
 1033          !,
 1034          file_name_extension(Base, Ext, Name),
 1035          resolve_file(Name, Options, [])
 1036        }
 1037    ),
 1038    !.
 1039wiki_face(Link, ArgNames, Options) -->          % TWiki: [[Label][Link]]
 1040    (   ['[','['], wiki_label(Label, ArgNames, Options), [']','[']
 1041    ->  wiki_link(Link, [label(Label), relative(true), end(']')]),
 1042        [']',']'], !
 1043    ).
 1044wiki_face(Link, ArgNames, Options) -->          % Markdown: [Label](Link)
 1045    (   ['['], wiki_label(Label, ArgNames, Options), [']','(']
 1046    ->  wiki_link(Link, [label(Label), relative(true), end(')')]),
 1047        [')'], !
 1048    ).
 1049wiki_face(Link, _ArgNames, _) -->
 1050    wiki_link(Link, []),
 1051    !.
 1052
 1053wiki_label(Label, _ArgNames, _Options) -->
 1054    image_label(Label).
 1055wiki_label(Label, ArgNames, Options) -->
 1056    next_level(Options, NOptions),
 1057    limit(40, wiki_faces(Label, ArgNames, NOptions.put(link,false))).
 1058
 1059%!  wiki_face_simple(-Out, +ArgNames, +Options)
 1060%
 1061%   Skip simple (non-markup) wiki.
 1062
 1063wiki_face_simple(Word, _, _) -->
 1064    [ w(Word) ],
 1065    !.
 1066wiki_face_simple(SpaceOrPunct, _, _) -->
 1067    [ SpaceOrPunct ],
 1068    { atomic(SpaceOrPunct) },
 1069    !.
 1070wiki_face_simple(FT, ArgNames, _) -->
 1071    [Structure],
 1072    { wiki_faces(Structure, ArgNames, FT)
 1073    }.
 1074
 1075wiki_words([]) --> [].
 1076wiki_words([Word|T]) --> [w(Word)], !, wiki_words(T).
 1077wiki_words([Punct|T]) --> [Punct], {atomic(Punct)}, wiki_words(T).
 1078
 1079%!  code_words(-Words)//
 1080%
 1081%   True when Words is the  content   as  it  appears in =|`code`|=,
 1082%   where =|``|= is mapped to =|`|=.
 1083
 1084code_words([]) --> [].
 1085code_words([Word|T]) --> [w(Word)], code_words(T).
 1086code_words(CodeL) --> ['`','`'], {CodeL = ['`'|T]}, code_words(T).
 1087code_words([Punct|T]) --> [Punct], {atomic(Punct)}, code_words(T).
 1088
 1089%!  eq_code_words(-Words)//
 1090%
 1091%   Stuff that can be between single `=`.  This is limited to
 1092%
 1093%           - Start and end must be a word
 1094%           - In between may be the following punctuation chars:
 1095%             =|.-:/|=, notably dealing with file names and
 1096%             identifiers in various external languages.
 1097
 1098eq_code_words([Word]) -->
 1099    [ w(Word) ].
 1100eq_code_words([Word|T]) -->
 1101    [ w(Word) ], eq_code_internals(T, [End]), [w(End)].
 1102
 1103eq_code_internals(T, T) --> [].
 1104eq_code_internals([H|T], Tail) -->
 1105    eq_code_internal(H),
 1106    eq_code_internals(T, Tail).
 1107
 1108eq_code_internal(Word) -->
 1109    [w(Word)].
 1110eq_code_internal(Punct) -->
 1111    [Punct],
 1112    { eq_code_internal_punct(Punct) }.
 1113
 1114eq_code_internal_punct('.').
 1115eq_code_internal_punct('-').
 1116eq_code_internal_punct(':').
 1117eq_code_internal_punct('/').
 1118
 1119
 1120%!  code_face(+Text, +Term, +Vars, -Code) is det.
 1121%
 1122%   Deal with =|`... code ...`|=  sequences.   Text  is  the matched
 1123%   text, Term is the parsed Prolog term   and Code is the resulting
 1124%   intermediate code.
 1125
 1126code_face(Text, Var, _, Code) :-
 1127    var(Var),
 1128    !,
 1129    Code = var(Text).
 1130code_face(Text, _, _, code(Text)).
 1131
 1132
 1133%!  emphasis_seq(-Out, +ArgNames, +Options) is semidet.
 1134%
 1135%   Recognise emphasis sequences
 1136
 1137emphasis_seq(EmphTerm, ArgNames, Options) -->
 1138    emphasis_start(C),
 1139    next_level(Options, NOptions),
 1140    matches(limit(100, wiki_faces(Emph, ArgNames, NOptions)), Input, Last),
 1141    emphasis_end(C),
 1142    { emph_markdown(Last, Input),
 1143      emphasis_term(C, Emph, EmphTerm)
 1144    },
 1145    !.
 1146
 1147
 1148%!  emphasis_term(+Emphasis, +Tokens, -Term) is det.
 1149%!  emphasis_before(-Before)// is semidet.
 1150%!  emphasis_start(-Emphasis)// is semidet.
 1151%!  emphasis_end(+Emphasis)// is semidet.
 1152%
 1153%   Primitives for Doxygen emphasis handling.
 1154
 1155emphasis_term('_',   Term, i(Term)).
 1156emphasis_term('*',   Term, b(Term)).
 1157emphasis_term('__',  Term, strong(Term)).
 1158emphasis_term('**',  Term, strong(Term)).
 1159
 1160emph_markdown(_, [w(_)]) :- !.
 1161emph_markdown(Last, Tokens) :-
 1162    \+ emphasis_after_sep(Last),
 1163    E = error(_,_),
 1164    catch(b_getval(pldoc_object, Obj), E, Obj = '??'),
 1165    debug(markdown(emphasis), '~q: additionally emphasis: ~p',
 1166          [Obj, Tokens]).
 1167
 1168emphasis_before(Before) -->
 1169    [Before],
 1170    { emphasis_start_sep(Before) }.
 1171
 1172emphasis_start_sep('\n').
 1173emphasis_start_sep(' ').
 1174emphasis_start_sep('<').
 1175emphasis_start_sep('{').
 1176emphasis_start_sep('(').
 1177emphasis_start_sep('[').
 1178emphasis_start_sep(',').
 1179emphasis_start_sep(':').
 1180emphasis_start_sep(';').
 1181
 1182emphasis_start(Which), [w(Word)] -->
 1183    emphasis(Which),
 1184    [w(Word)].
 1185
 1186emphasis(**)   --> [*, *].
 1187emphasis(*)    --> [*].
 1188emphasis('__') --> ['_', '_'].
 1189emphasis('_')  --> ['_'].
 1190
 1191emphasis_end(Which), [After] -->
 1192    emphasis(Which),
 1193    [ After ],
 1194    !,
 1195    { emphasis_close_sep(After) -> true }.
 1196emphasis_end(Which) -->
 1197    emphasis(Which).
 1198
 1199% these characters should not be before a closing * or _.
 1200
 1201emphasis_after_sep('\n').
 1202emphasis_after_sep(' ').
 1203emphasis_after_sep('(').
 1204emphasis_after_sep('[').
 1205emphasis_after_sep('<').
 1206emphasis_after_sep('=').
 1207emphasis_after_sep('+').
 1208emphasis_after_sep('\\').
 1209emphasis_after_sep('@').
 1210
 1211emphasis_close_sep('\n').                       % white
 1212emphasis_close_sep(' ').                        % white
 1213emphasis_close_sep(',').                        % sentence punctuation
 1214emphasis_close_sep('.').
 1215emphasis_close_sep('!').
 1216emphasis_close_sep('?').
 1217emphasis_close_sep(':').
 1218emphasis_close_sep(';').
 1219emphasis_close_sep(']').                        % [**label**](link)
 1220emphasis_close_sep(')').                        % ... _italic_)
 1221emphasis_close_sep('}').                        % ... _italic_}
 1222emphasis_close_sep(Token) :-
 1223    structure_term(Token).
 1224
 1225
 1226%!  arg_list(-Atoms) is nondet.
 1227%
 1228%   Atoms  is  a  token-list  for  a    Prolog   argument  list.  An
 1229%   argument-list is a sequence of tokens '(' ... ')'.
 1230%
 1231%   @bug    the current implementation does not deal correctly with
 1232%           brackets that are embedded in quoted strings.
 1233
 1234arg_list(['('|T]) -->
 1235    ['('], arg_list_close(T, 1).
 1236
 1237arg_list_close(Tokens, Depth) -->
 1238    [')'],
 1239    !,
 1240    (   { Depth == 1 }
 1241    ->  { Tokens = [')'] }
 1242    ;   { Depth > 1 }
 1243    ->  { Tokens = [')'|More],
 1244          NewDepth is Depth - 1
 1245        },
 1246        arg_list_close(More, NewDepth)
 1247    ).
 1248arg_list_close(['('|T], Depth) -->
 1249    ['('], { NewDepth is Depth+1 },
 1250    arg_list_close(T, NewDepth).
 1251arg_list_close([H|T], Depth) -->
 1252    [w(H)],
 1253    !,
 1254    arg_list_close(T, Depth).
 1255arg_list_close([H|T], Depth) -->
 1256    [H],
 1257    arg_list_close(T, Depth).
 1258
 1259
 1260%!  term_face(+Text, +Term, +Vars, -Face, +Options) is semidet.
 1261%
 1262%   Process embedded Prolog-terms. Currently   processes  Alias(Arg)
 1263%   terms that refer to files.  Future   versions  will also provide
 1264%   pretty-printing of Prolog terms.
 1265
 1266term_face(_Text, Term, _Vars, \file(Name, FileOptions), Options) :-
 1267    ground(Term),
 1268    compound(Term),
 1269    compound_name_arity(Term, Alias, 1),
 1270    user:file_search_path(Alias, _),
 1271    existing_file(Term, FileOptions, [], Options),
 1272    !,
 1273    format(atom(Name), '~q', [Term]).
 1274term_face(Text, Term, Vars, Face, _Options) :-
 1275    code_face(Text, Term, Vars, Face).
 1276
 1277untag([], []).
 1278untag([w(W)|T0], [W|T]) :-
 1279    !,
 1280    untag(T0, T).
 1281untag([H|T0], [H|T]) :-
 1282    untag(T0, T).
 1283
 1284%!  image_label(-Label)//
 1285%
 1286%   Match File[;param=value[,param=value]*]
 1287
 1288image_label(\include(Name, image, Options)) -->
 1289    file_name(Base, Ext),
 1290    { autolink_extension(Ext, image),
 1291      file_name_extension(Base, Ext, Name),
 1292      resolve_file(Name, Options, RestOptions)
 1293    },
 1294    file_options(RestOptions).
 1295
 1296
 1297take_predref(\predref(Name/Arity)) -->
 1298    [ w(Name), '/' ], arity(Arity),
 1299    { functor_name(Name)
 1300    }.
 1301take_predref(\predref(Module:(Name/Arity))) -->
 1302    [ w(Module), ':', w(Name), '/' ], arity(Arity),
 1303    { functor_name(Name)
 1304    }.
 1305take_predref(\predref(Name/Arity)) -->
 1306    prolog_symbol_char(S0),
 1307    symbol_string(SRest), [ '/' ], arity(Arity),
 1308    !,
 1309    { atom_chars(Name, [S0|SRest])
 1310    }.
 1311take_predref(\predref(Name//Arity)) -->
 1312    [ w(Name), '/', '/' ], arity(Arity),
 1313    { functor_name(Name)
 1314    }.
 1315take_predref(\predref(Module:(Name//Arity))) -->
 1316    [ w(Module), ':', w(Name), '/', '/' ], arity(Arity),
 1317    { functor_name(Name)
 1318    }.
 1319
 1320%!  file_options(-Options) is det.
 1321%
 1322%   Extracts additional processing options for  files. The format is
 1323%   ;name="value",name2=value2,... Spaces are not allowed.
 1324
 1325file_options(Options) -->
 1326    [;], nv_pairs(Options),
 1327    !.
 1328file_options([]) -->
 1329    [].
 1330
 1331nv_pairs([H|T]) -->
 1332    nv_pair(H),
 1333    (   [',']
 1334    ->  nv_pairs(T)
 1335    ;   {T=[]}
 1336    ).
 1337
 1338nv_pair(Option) -->
 1339    [ w(Name), =,'"'], tokens(Tokens), ['"'],
 1340    !,
 1341    { untag(Tokens, Atoms),
 1342      atomic_list_concat(Atoms, Value0),
 1343      (   atom_number(Value0, Value)
 1344      ->  true
 1345      ;   Value = Value0
 1346      ),
 1347      Option =.. [Name,Value]
 1348    }.
 1349
 1350
 1351%!  wiki_link(-Link, +Options)// is semidet.
 1352%
 1353%   True if we can find a link to a file or URL. Links are described
 1354%   as one of:
 1355%
 1356%       $ filename :
 1357%       A filename defined using autolink_file/2 or
 1358%       autolink_extension/2
 1359%       $ <url-protocol>://<rest-url> :
 1360%       A fully qualified URL
 1361%       $ '<' URL '>' :
 1362%       Be more relaxed on the URL specification.
 1363
 1364:- multifile
 1365    user:url_path/2. 1366
 1367wiki_link(\file(Name, FileOptions), Options) -->
 1368    file_name(Base, Ext),
 1369    { file_name_extension(Base, Ext, Name),
 1370      (   autolink_file(Name, _)
 1371      ;   autolink_extension(Ext, _)
 1372      ),
 1373      !,
 1374      resolve_file(Name, FileOptions, Options)
 1375    }.
 1376wiki_link(\file(Name, FileOptions), Options) -->
 1377    [w(Name)],
 1378    { autolink_file(Name, _),
 1379      !,
 1380      resolve_file(Name, FileOptions, Options)
 1381    },
 1382    !.
 1383wiki_link(a(href(Ref), Label), Options) -->
 1384    [ w(Prot),:,/,/], { url_protocol(Prot) },
 1385    { option(end(End), Options, space)
 1386    },
 1387    tokens_no_whitespace(Rest), peek_end_url(End),
 1388    !,
 1389    { atomic_list_concat([Prot, :,/,/ | Rest], Ref),
 1390      option(label(Label), Options, Ref)
 1391    }.
 1392wiki_link(a(href(Ref), Label), _Options) -->
 1393    [<, w(Alias), :],
 1394    tokens_no_whitespace(Rest), [>],
 1395    { Term = (Alias:Rest),
 1396      prolog:url_expansion_hook(Term, Ref, Label), !
 1397    }.
 1398wiki_link(a(href(Ref), Label), Options) -->
 1399    [<, w(Alias), :],
 1400    { user:url_path(Alias, _)
 1401    },
 1402    tokens_no_whitespace(Rest), [>],
 1403    { atomic_list_concat(Rest, Local),
 1404      (   Local == ''
 1405      ->  Term =.. [Alias,'.']
 1406      ;   Term =.. [Alias,Local]
 1407      ),
 1408      E = error(_,_),
 1409      catch(expand_url_path(Term, Ref), E, fail),
 1410      option(label(Label), Options, Ref)
 1411    }.
 1412wiki_link(a(href(Ref), Label), Options) -->
 1413    [#, w(First)],
 1414    { option(end(End), Options) },
 1415    tokens_no_whitespace(Rest),
 1416    peek_end_url(End),
 1417    !,
 1418    { atomic_list_concat([#,First|Rest], Ref),
 1419      option(label(Label), Options, Ref)
 1420    }.
 1421wiki_link(a(href(Ref), Label), Options) -->
 1422    [<],
 1423    (   { option(relative(true), Options),
 1424          Parts = Rest
 1425        }
 1426    ->  tokens_no_whitespace(Rest)
 1427    ;   { Parts = [Prot, : | Rest]
 1428        },
 1429        [w(Prot), :], tokens_no_whitespace(Rest)
 1430    ),
 1431    [>],
 1432    !,
 1433    { atomic_list_concat(Parts, Ref),
 1434      option(label(Label), Options, Ref)
 1435    }.
 1436
 1437%!  prolog:url_expansion_hook(+Term, -HREF, -Label) is semidet.
 1438%
 1439%   This hook is called after   recognising  =|<Alias:Rest>|=, where
 1440%   Term is of the form Alias(Rest). If   it  succeeds, it must bind
 1441%   HREF to an atom or string representing the link target and Label
 1442%   to an html//1 expression for the label.
 1443
 1444%!  file_name(-Name:atom, -Ext:atom)// is semidet.
 1445%
 1446%   Matches a filename.  A filename is defined as a sequence
 1447%   <segment>{/<segment}.<ext>.
 1448
 1449file_name(FileBase, Extension) -->
 1450    segment(S1),
 1451    segments(List),
 1452    ['.'], file_extension(Extension),
 1453    !,
 1454    { atomic_list_concat([S1|List], '/', FileBase) }.
 1455file_name(FileBase, Extension) -->
 1456    [w(Alias), '('],
 1457    { once(user:file_search_path(Alias, _)) },
 1458    segment(S1),
 1459    segments(List),
 1460    [')'],
 1461    !,
 1462    { atomic_list_concat([S1|List], '/', Base),
 1463      Spec =.. [Alias,Base],
 1464      absolute_file_name(Spec, Path,
 1465                         [ access(read),
 1466                           extensions([pl]),
 1467                           file_type(prolog),
 1468                           file_errors(fail)
 1469                         ]),
 1470      file_name_extension(FileBase, Extension, Path)
 1471    }.
 1472
 1473
 1474segment(..) -->
 1475    ['.','.'],
 1476    !.
 1477segment(Word) -->
 1478    [w(Word)].
 1479segment(Dir) -->
 1480    [w(Word),'.',w(d)],
 1481    { atom_concat(Word, '.d', Dir) }.
 1482
 1483segments([H|T]) -->
 1484    ['/'],
 1485    !,
 1486    segment(H),
 1487    segments(T).
 1488segments([]) -->
 1489    [].
 1490
 1491file_extension(Ext) -->
 1492    [w(Ext)],
 1493    { autolink_extension(Ext, _)
 1494    }.
 1495
 1496
 1497%!  resolve_file(+Name, -FileOptions, ?RestOptions, +Options) is det.
 1498%
 1499%   Find the actual file based on the pldoc_file global variable. If
 1500%   present  and  the   file   is    resolvable,   add   an   option
 1501%   absolute_path(Path) that reflects the current   location  of the
 1502%   file.
 1503
 1504resolve_file(Name, FileOptions, Rest) :-
 1505    existing_file(Name, FileOptions, Rest, []),
 1506    !.
 1507resolve_file(_, Options, Options).
 1508
 1509
 1510existing_file(Name, FileOptions, Rest, Options) :-
 1511    \+ Options.get(link) == false,
 1512    E = error(_,_),
 1513    catch(existing_file_p(Name, FileOptions, Rest), E, fail).
 1514
 1515existing_file_p(Name, FileOptions, Rest) :-
 1516    (   nb_current(pldoc_file, RelativeTo),
 1517        RelativeTo \== []
 1518    ->  Extra = [relative_to(RelativeTo)|Extra1]
 1519    ;   Extra = Extra1
 1520    ),
 1521    (   compound(Name)
 1522    ->  Extra1 = [file_type(prolog)]
 1523    ;   Extra1 = []
 1524    ),
 1525    absolute_file_name(Name, Path,
 1526                       [ access(read),
 1527                         file_errors(fail)
 1528                       | Extra
 1529                       ]),
 1530    FileOptions = [ absolute_path(Path) | Rest ].
 1531
 1532%!  arity(-Arity:int)// is semidet.
 1533%
 1534%   True if the next token can be  interpreted as an arity. That is,
 1535%   refers to a non-negative integers of at most 20. Although Prolog
 1536%   allows for higher arities, we assume 20   is  a fair maximum for
 1537%   user-created predicates that are documented.
 1538
 1539arity(Arity) -->
 1540    [ w(Word) ],
 1541    { E = error(_,_),
 1542      catch(atom_number(Word, Arity), E, fail),
 1543      Arity >= 0, Arity < 20
 1544    }.
 1545
 1546%!  symbol_string(-String)// is nondet
 1547%
 1548%   Accept a sequence of Prolog symbol characters, starting with the
 1549%   shortest (empty) match.
 1550
 1551symbol_string([]) -->
 1552    [].
 1553symbol_string([H|T]) -->
 1554    [H],
 1555    { prolog_symbol_char(H) },
 1556    symbol_string(T).
 1557
 1558prolog_symbol_char(C) -->
 1559    [C],
 1560    { prolog_symbol_char(C) }.
 1561
 1562%!  prolog_symbol_char(?Char)
 1563%
 1564%   True if char is classified by Prolog as a symbol char.
 1565
 1566prolog_symbol_char(#).
 1567prolog_symbol_char($).
 1568prolog_symbol_char(&).
 1569prolog_symbol_char(*).
 1570prolog_symbol_char(+).
 1571prolog_symbol_char(-).
 1572prolog_symbol_char(.).
 1573prolog_symbol_char(/).
 1574prolog_symbol_char(:).
 1575prolog_symbol_char(<).
 1576prolog_symbol_char(=).
 1577prolog_symbol_char(>).
 1578prolog_symbol_char(?).
 1579prolog_symbol_char(@).
 1580prolog_symbol_char(\).
 1581prolog_symbol_char(^).
 1582prolog_symbol_char(~).
 1583
 1584
 1585functor_name(String) :-
 1586    sub_atom(String, 0, 1, _, Char),
 1587    char_type(Char, lower).
 1588
 1589url_protocol(http).
 1590url_protocol(https).
 1591url_protocol(ftp).
 1592url_protocol(mailto).
 1593
 1594peek_end_url(space) -->
 1595    peek(Punct, End),
 1596    { punct_token(Punct),
 1597      space_token(End)
 1598    },
 1599    !.
 1600peek_end_url(space) -->
 1601    peek(End),
 1602    { space_token(End) },
 1603    !.
 1604peek_end_url(space, [], []) :- !.
 1605peek_end_url(Token) -->
 1606    peek(Token),
 1607    !.
 1608
 1609punct_token('.').
 1610punct_token('!').
 1611punct_token('?').
 1612punct_token(',').
 1613punct_token(';').
 1614
 1615space_token(' ') :- !.
 1616space_token('\r') :- !.
 1617space_token('\n') :- !.
 1618space_token(T) :-
 1619    \+ atom(T),                     % high level format like p(...)
 1620    \+ T = w(_).
 1621
 1622%!  autolink_extension(?Ext, ?Type) is nondet.
 1623%
 1624%   True if Ext is a filename extensions that create automatic links
 1625%   in the documentation.
 1626
 1627autolink_extension(Ext, Type) :-
 1628    prolog:doc_autolink_extension(Ext, Type),
 1629    !.
 1630autolink_extension(Ext, prolog) :-
 1631    user:prolog_file_type(Ext,prolog),
 1632    !.
 1633autolink_extension(txt, wiki).
 1634autolink_extension(md,  wiki).
 1635autolink_extension(gif, image).
 1636autolink_extension(png, image).
 1637autolink_extension(jpg, image).
 1638autolink_extension(jpeg, image).
 1639autolink_extension(svg, image).
 1640
 1641%!  autolink_file(?File, -Type) is nondet.
 1642%
 1643%   Files to which we automatically create links, regardless of the
 1644%   extension.
 1645
 1646autolink_file('README', wiki).
 1647autolink_file('TODO', wiki).
 1648autolink_file('ChangeLog', wiki).
 1649
 1650%!  citations(-List)//
 1651%
 1652%   Parse @cite1[;@cite2]* into a list of citations.
 1653
 1654citations([H|T]) -->
 1655    citation(H),
 1656    (   [';']
 1657    ->  citations(T)
 1658    ;   {T=[]}
 1659    ).
 1660
 1661citation(Atom) -->
 1662    [@], wiki_words(Atoms),
 1663    { length(Atoms, Len),
 1664      Len > 10, !,
 1665      fail
 1666    ; true
 1667    },
 1668    end_citation,
 1669    !,
 1670    { atomic_list_concat(Atoms, Atom)
 1671    }.
 1672
 1673end_citation, [';'] --> [';'].
 1674end_citation, ['@'] --> ['@'].
 1675end_citation, [']'] --> [']'].
 1676
 1677
 1678                 /*******************************
 1679                 *           SECTIONS           *
 1680                 *******************************/
 1681
 1682%!  section_comment_header(+Lines, -Header, -RestLines) is semidet.
 1683%
 1684%   Processes   /**   <section>   comments.   Header   is   a   term
 1685%   \section(Type, Title), where  Title  is   an  atom  holding  the
 1686%   section title and Type is an atom holding the text between <>.
 1687%
 1688%   @param Lines    List of Indent-Codes.
 1689%   @param Header   DOM term of the format \section(Type, Title),
 1690%                   where Type is an atom from <type> and Title is
 1691%                   a string holding the type.
 1692
 1693section_comment_header([_-Line|Lines], Header, Lines) :-
 1694    phrase(section_line(Header), Line).
 1695
 1696section_line(\section(Type, Title)) -->
 1697    ws, "<", word(Codes), ">", normalise_white_space(TitleCodes),
 1698    { atom_codes(Type, Codes),
 1699      atom_codes(Title, TitleCodes)
 1700    }.
 1701
 1702                 /*******************************
 1703                 *           TOKENIZER          *
 1704                 *******************************/
 1705
 1706%!  tokenize_lines(+Lines:lines, -TokenLines) is det
 1707%
 1708%   Convert Indent-Codes into Indent-Tokens
 1709
 1710tokenize_lines(Lines, TokenLines) :-
 1711    tokenize_lines(Lines, -1, TokenLines).
 1712
 1713tokenize_lines([], _, []) :- !.
 1714tokenize_lines(Lines, Indent, [Pre|T]) :-
 1715    verbatim(Lines, Indent, Pre, RestLines),
 1716    !,
 1717    tokenize_lines(RestLines, Indent, T).
 1718tokenize_lines([I-H0|T0], Indent0, [I-H|T]) :-
 1719    phrase(line_tokens(H), H0),
 1720    (   H == []
 1721    ->  Indent = Indent0
 1722    ;   Indent = I
 1723    ),
 1724    tokenize_lines(T0, Indent, T).
 1725
 1726
 1727%!  line_tokens(-Tokens:list)// is det.
 1728%
 1729%   Create a list of tokens, where  is  token   is  either  a ' ' to
 1730%   denote spaces, a  term  w(Word)  denoting   a  word  or  an atom
 1731%   denoting a punctuation  character.   Underscores  (_)  appearing
 1732%   inside an alphanumerical string are considered part of the word.
 1733%   E.g., "hello_world_" tokenizes into [w(hello_world), '_'].
 1734
 1735line_tokens([H|T]) -->
 1736    line_token(H),
 1737    !,
 1738    line_tokens(T).
 1739line_tokens([]) -->
 1740    [].
 1741
 1742line_token(T) -->
 1743    [C],
 1744    (   { code_type(C, space) }
 1745    ->  ws,
 1746        { T = ' ' }
 1747    ;   { code_type(C, alnum) },
 1748        word(Rest),
 1749        { atom_codes(W, [C|Rest]),
 1750          T = w(W)
 1751        }
 1752    ;   { char_code(T, C) }
 1753    ).
 1754
 1755word([C0|T]) -->
 1756    [C0],  { code_type(C0, alnum) },
 1757    !,
 1758    word(T).
 1759word([0'_, C1|T]) -->
 1760    [0'_, C1],  { code_type(C1, alnum) },
 1761    !,
 1762    word(T).
 1763word([]) -->
 1764    [].
 1765
 1766alphas([C0|T]) -->
 1767    [C0],  { code_type(C0, alpha) },
 1768    !,
 1769    alphas(T).
 1770alphas([]) -->
 1771    [].
 1772
 1773%!  verbatim(+Lines, +EnvIndent, -Pre, -RestLines) is det.
 1774%
 1775%   Extract a verbatim environment.  The  returned   Pre  is  of the
 1776%   format pre(Attributes, String). The indentation   of the leading
 1777%   fence is substracted from the indentation of the verbatim lines.
 1778%   Two types of fences are supported:   the  traditional =|==|= and
 1779%   the Doxygen =|~~~|= (minimum  3   =|~|=  characters), optionally
 1780%   followed by =|{.ext}|= to indicate the language.
 1781%
 1782%   Verbatim environment is delimited as
 1783%
 1784%     ==
 1785%       ...,
 1786%       verbatim(Lines, Pre, Rest)
 1787%       ...,
 1788%     ==
 1789%
 1790%   In addition, a verbatim environment may  simply be indented. The
 1791%   restrictions are described in the documentation.
 1792
 1793verbatim(Lines, _,
 1794         Indent-pre([class(code), ext(Ext)],Pre),
 1795         RestLines) :-
 1796    skip_empty_lines(Lines, [Indent-FenceLine|CodeLines]),
 1797    verbatim_fence(FenceLine, Fence, Ext),
 1798    verbatim_body(CodeLines, Indent, [10|PreCodes], [],
 1799                  [Indent-Fence|RestLines]),
 1800    !,
 1801    atom_codes(Pre, PreCodes).
 1802verbatim([_-[],Indent-Line|Lines], EnvIndent,
 1803         Indent-pre(class(code),Pre),
 1804         RestLines) :-
 1805    EnvIndent >= 0,
 1806    Indent >= EnvIndent+4, Indent =< EnvIndent+8,
 1807    valid_verbatim_opening(Line),
 1808    indented_verbatim_body([Indent-Line|Lines], Indent,
 1809                           CodeLines, RestLines),
 1810    !,
 1811    lines_code_text(CodeLines, Indent, [10|PreCodes]),
 1812    atom_codes(Pre, PreCodes).
 1813
 1814verbatim_body(Lines, _, PreT, PreT, Lines).
 1815verbatim_body([I-L|Lines], Indent, [10|Pre], PreT, RestLines) :-
 1816    PreI is I - Indent,
 1817    phrase(pre_indent(PreI), Pre, PreT0),
 1818    verbatim_line(L, PreT0, PreT1),
 1819    verbatim_body(Lines, Indent, PreT1, PreT, RestLines).
 1820
 1821verbatim_fence(Line, Fence, '') :-
 1822    Line == [0'=,0'=],
 1823    !,
 1824    Fence = Line.
 1825verbatim_fence(Line, Fence, Ext) :-
 1826    tilde_fence(Line, Fence, 0, Ext).
 1827verbatim_fence(Line, Fence, Ext) :-
 1828    md_fence(Line, Fence, 0, Ext).
 1829
 1830tilde_fence([0'~|T0], [0'~|F0], C0, Ext) :-
 1831    !,
 1832    C1 is C0+1,
 1833    tilde_fence(T0, F0, C1, Ext).
 1834tilde_fence(List, [], C, Ext) :-
 1835    C >= 3,
 1836    (   List == []
 1837    ->  Ext = ''
 1838    ;   phrase(tilde_fence_ext(ExtCodes), List)
 1839    ->  atom_codes(Ext, ExtCodes)
 1840    ).
 1841
 1842%!  tilde_fence_ext(-Ext)// is semidet.
 1843%
 1844%   Detect ```{.prolog} (Doxygen) or ```{prolog} (GitHub)
 1845
 1846tilde_fence_ext(Ext) -->
 1847    "{.", !, alphas(Ext), "}".
 1848tilde_fence_ext(Ext) -->
 1849    "{", alphas(Ext), "}".
 1850
 1851md_fence([0'`|T0], [0'`|F0], C0, Ext) :-
 1852    !,
 1853    C1 is C0+1,
 1854    md_fence(T0, F0, C1, Ext).
 1855md_fence(List, [], C, Ext) :-
 1856    C >= 3,
 1857    (   List == []
 1858    ->  Ext = ''
 1859    ;   phrase(md_fence_ext(ExtCodes), List),
 1860        atom_codes(Ext, ExtCodes)
 1861    ).
 1862
 1863% Also support Doxygen's curly bracket notation.
 1864md_fence_ext(Ext) -->
 1865    tilde_fence_ext(Ext),
 1866    !.
 1867% In Markdown language names appear without brackets.
 1868md_fence_ext(Ext) -->
 1869    alphas(Ext).
 1870
 1871%!  indented_verbatim_body(+Lines, +Indent, -CodeLines, -RestLines)
 1872%
 1873%   Takes more verbatim lines. The input   ends  with the first line
 1874%   that is indented less than Indent. There cannot be more than one
 1875%   consequtive empty line in the verbatim body.
 1876
 1877indented_verbatim_body([I-L|T0], Indent, [I-L|T], RestLines) :-
 1878    L \== [], I >= Indent,
 1879    !,
 1880    indented_verbatim_body(T0, Indent, T, RestLines).
 1881indented_verbatim_body([I0-[],I-L|T0], Indent, [I0-[],I-L|T], RestLines) :-
 1882    I >= Indent,
 1883    valid_verbatim_opening(L),
 1884    indented_verbatim_body(T0, Indent, T, RestLines).
 1885indented_verbatim_body(Lines, _, [], Lines).
 1886
 1887%!  valid_verbatim_opening(+Line) is semidet.
 1888%
 1889%   Tests that line does not look like a list item or table.
 1890
 1891valid_verbatim_opening([0'||_]) :- !, fail.
 1892valid_verbatim_opening(Line) :-
 1893    Line \== [],
 1894    \+ ( phrase(line_tokens(Tokens), Line),
 1895         list_item_prefix(_Type, Tokens, _Rest)
 1896       ).
 1897
 1898%!  lines_code_text(+Lines, +Indent, -Codes) is det.
 1899%
 1900%   Extract the actual code content from a list of line structures.
 1901
 1902lines_code_text([], _, []).
 1903lines_code_text([_-[]|T0], Indent, [10|T]) :-
 1904    !,
 1905    lines_code_text(T0, Indent, T).
 1906lines_code_text([I-Line|T0], Indent, [10|T]) :-
 1907    PreI is I-Indent,
 1908    phrase(pre_indent(PreI), T, T1),
 1909    verbatim_line(Line, T1, T2),
 1910    lines_code_text(T0, Indent, T2).
 1911
 1912
 1913%!  pre_indent(+Indent)// is det.
 1914%
 1915%   Insert Indent leading spaces.  Note we cannot use tabs as these
 1916%   are not expanded by the HTML <pre> element.
 1917
 1918pre_indent(N) -->
 1919    { N > 0,
 1920      !,
 1921      N2 is N - 1
 1922    }, " ",
 1923    pre_indent(N2).
 1924pre_indent(_) -->
 1925    "".
 1926
 1927verbatim_line(Line, Pre, PreT) :-
 1928    append(Line, PreT, Pre).
 1929
 1930
 1931                 /*******************************
 1932                 *            SUMMARY           *
 1933                 *******************************/
 1934
 1935%!  summary_from_lines(+Lines:lines, -Summary:list(codes)) is det.
 1936%
 1937%   Produce a summary for Lines. Similar  to JavaDoc, the summary is
 1938%   defined as the first sentence of the documentation. In addition,
 1939%   a sentence is also ended by an  empty   line  or  the end of the
 1940%   comment.
 1941
 1942summary_from_lines(Lines, Sentence) :-
 1943    skip_empty_lines(Lines, Lines1),
 1944    summary2(Lines1, Sentence0),
 1945    end_sentence(Sentence0, Sentence).
 1946
 1947summary2(_, Sentence) :-
 1948    Sentence == [],
 1949    !.              % we finished our sentence
 1950summary2([], []) :- !.
 1951summary2([_-[]|_], []) :- !.            % empty line
 1952summary2([_-[0'@|_]|_], []) :- !.       % keyword line
 1953summary2([_-L0|Lines], Sentence) :-
 1954    phrase(sentence(Sentence, Tail), L0, _),
 1955    summary2(Lines, Tail).
 1956
 1957sentence([C,End], []) -->
 1958    [C,End],
 1959    { \+ code_type(C, period),
 1960      code_type(End, period)                % ., !, ?
 1961    },
 1962    space_or_eos,
 1963    !.
 1964sentence([0' |T0], T) -->
 1965    space,
 1966    !,
 1967    ws,
 1968    sentence(T0, T).
 1969sentence([H|T0], T) -->
 1970    [H],
 1971    sentence(T0, T).
 1972sentence([0' |T], T) -->                % '
 1973    eos.
 1974
 1975space_or_eos -->
 1976    [C],
 1977    !,
 1978    {code_type(C, space)}.
 1979space_or_eos -->
 1980    eos.
 1981
 1982%!  skip_empty_lines(+LinesIn, -LinesOut) is det.
 1983%
 1984%   Remove empty lines from the start of the input.  Note that
 1985%   this is used both to process character and token data.
 1986
 1987skip_empty_lines([], []).
 1988skip_empty_lines([_-[]|Lines0], Lines) :-
 1989    !,
 1990    skip_empty_lines(Lines0, Lines).
 1991skip_empty_lines(Lines, Lines).
 1992
 1993end_sentence([], []).
 1994end_sentence([0'\s], [0'.]) :- !.
 1995end_sentence([H|T0], [H|T]) :-
 1996    end_sentence(T0, T).
 1997
 1998
 1999                 /*******************************
 2000                 *        CREATE LINES          *
 2001                 *******************************/
 2002
 2003%!  indented_lines(+Text:list(codes), +Prefixes:list(codes),
 2004%!                 -Lines:list) is det.
 2005%
 2006%   Extract a list of lines  without   leading  blanks or characters
 2007%   from Prefix from Text. Each line   is a term Indent-Codes, where
 2008%   Indent specifies the line_position of the real text of the line.
 2009
 2010indented_lines(Comment, Prefixes, Lines) :-
 2011    must_be(codes, Comment),
 2012    phrase(split_lines(Prefixes, Lines), Comment),
 2013    !.
 2014
 2015split_lines(_, []) -->
 2016    end_of_comment.
 2017split_lines(Prefixes, [Indent-L1|Ls]) -->
 2018    take_prefix(Prefixes, 0, Indent0),
 2019    white_prefix(Indent0, Indent),
 2020    take_line(L1),
 2021    split_lines(Prefixes, Ls).
 2022
 2023
 2024%!  end_of_comment//
 2025%
 2026%   Succeeds if we hit the end of the comment.
 2027%
 2028%   @bug    %*/ will be seen as the end of the comment.
 2029
 2030end_of_comment -->
 2031    eos.
 2032end_of_comment -->
 2033    ws, stars, "*/".
 2034
 2035stars --> [].
 2036stars --> "*", !, stars.
 2037
 2038
 2039%!  take_prefix(+Prefixes:list(codes), +Indent0:int, -Indent:int)// is det.
 2040%
 2041%   Get the leading characters  from  the   input  and  compute  the
 2042%   line-position at the end of the leading characters.
 2043
 2044take_prefix(Prefixes, I0, I) -->
 2045    { member(Prefix, Prefixes),
 2046      string_codes(Prefix, PrefixCodes)
 2047    },
 2048    prefix(PrefixCodes),
 2049    !,
 2050    { string_update_linepos(PrefixCodes, I0, I) }.
 2051take_prefix(_, I, I) -->
 2052    [].
 2053
 2054prefix([]) --> [].
 2055prefix([H|T]) --> [H], prefix(T).
 2056
 2057white_prefix(I0, I) -->
 2058    [C],
 2059    {  code_type(C, white),
 2060       !,
 2061       update_linepos(C, I0, I1)
 2062    },
 2063    white_prefix(I1, I).
 2064white_prefix(I, I) -->
 2065    [].
 2066
 2067%!  string_update_linepos(+Codes, +Pos0, -Pos) is det.
 2068%
 2069%   Update line-position after adding Codes at Pos0.
 2070
 2071string_update_linepos([], I, I).
 2072string_update_linepos([H|T], I0, I) :-
 2073    update_linepos(H, I0, I1),
 2074    string_update_linepos(T, I1, I).
 2075
 2076%!  update_linepos(+Code, +Pos0, -Pos) is det.
 2077%
 2078%   Update line-position after adding Code.
 2079%
 2080%   @tbd    Currently assumes tab-width of 8.
 2081
 2082update_linepos(0'\t, I0, I) :-
 2083    !,
 2084    I is (I0\/7)+1.
 2085update_linepos(0'\b, I0, I) :-
 2086    !,
 2087    I is max(0, I0-1).
 2088update_linepos(0'\r, _, 0) :- !.
 2089update_linepos(0'\n, _, 0) :- !.
 2090update_linepos(_, I0, I) :-
 2091    I is I0 + 1.
 2092
 2093%!  take_line(-Line:codes)// is det.
 2094%
 2095%   Take  a  line  from  the  input.   Line  does  not  include  the
 2096%   terminating \r or \n character(s), nor trailing whitespace.
 2097
 2098take_line([]) -->
 2099    "\r\n",
 2100    !.                      % DOS file
 2101take_line([]) -->
 2102    "\n",
 2103    !.                        % Unix file
 2104take_line(Line) -->
 2105    [H], { code_type(H, white) },
 2106    !,
 2107    take_white(White, WT),
 2108    (   nl
 2109    ->  { Line = [] }
 2110    ;   { Line = [H|White] },
 2111        take_line(WT)
 2112    ).
 2113take_line([H|T]) -->
 2114    [H],
 2115    !,
 2116    take_line(T).
 2117take_line([]) -->                       % end of string
 2118    [].
 2119
 2120take_white([H|T0], T) -->
 2121    [H],  { code_type(H, white) },
 2122    !,
 2123    take_white(T0, T).
 2124take_white(T, T) -->
 2125    [].
 2126
 2127%!  normalise_indentation(+LinesIn, -LinesOut) is det.
 2128%
 2129%   Re-normalise the indentation, such that the  lef-most line is at
 2130%   zero.  Note that we skip empty lines in the computation.
 2131
 2132normalise_indentation(Lines0, Lines) :-
 2133    skip_empty_lines(Lines0, Lines1),
 2134    Lines1 = [I0-_|Lines2],
 2135    !,
 2136    smallest_indentation(Lines2, I0, Subtract),
 2137    (   Subtract == 0
 2138    ->  Lines = Lines0
 2139    ;   maplist(substract_indent(Subtract), Lines0, Lines)
 2140    ).
 2141normalise_indentation(Lines, Lines).
 2142
 2143smallest_indentation([], I, I).
 2144smallest_indentation([_-[]|T], I0, I) :-
 2145    !,
 2146    smallest_indentation(T, I0, I).
 2147smallest_indentation([X-_|T], I0, I) :-
 2148    I1 is min(I0, X),
 2149    smallest_indentation(T, I1, I).
 2150
 2151substract_indent(Subtract, I0-L, I-L) :-
 2152    I is max(0,I0-Subtract).
 2153
 2154
 2155                 /*******************************
 2156                 *             MISC             *
 2157                 *******************************/
 2158
 2159%!  strip_leading_par(+Dom0, -Dom) is det.
 2160%
 2161%   Remove the leading paragraph for  environments where a paragraph
 2162%   is not required.
 2163
 2164strip_leading_par([p(C)|T], L) :-
 2165    !,
 2166    append(C, T, L).
 2167strip_leading_par(L, L).
 2168
 2169
 2170                 /*******************************
 2171                 *           DCG BASICS         *
 2172                 *******************************/
 2173
 2174%!  ws// is det
 2175%
 2176%   Eagerly skip layout characters
 2177
 2178ws -->
 2179    [C], {code_type(C, space)},
 2180    !,
 2181    ws.
 2182ws -->
 2183    [].
 2184
 2185%       space// is det
 2186%
 2187%       True if then next code is layout.
 2188
 2189space -->
 2190    [C],
 2191    {code_type(C, space)}.
 2192
 2193%!  nl//
 2194%
 2195%   Get end-of-line
 2196
 2197nl -->
 2198    "\r\n",
 2199    !.
 2200nl -->
 2201    "\n".
 2202
 2203%!  peek(H)//
 2204%
 2205%   True if next token is H without eating it.
 2206
 2207peek(H, L, L) :-
 2208    L = [H|_].
 2209
 2210peek(H1, H2, L, L) :-
 2211    L = [H1, H2|_].
 2212
 2213%!  tokens(-Tokens:list)// is nondet.
 2214%!  tokens(+Max, -Tokens:list)// is nondet.
 2215%
 2216%   Defensively take tokens from the input.  Backtracking takes more
 2217%   tokens.  Do not include structure terms.
 2218
 2219tokens([]) --> [].
 2220tokens([H|T]) --> token(H), tokens(T).
 2221
 2222tokens(_, []) --> [].
 2223tokens(C, [H|T]) --> token(H), {succ(C1, C)}, tokens(C1, T).
 2224
 2225%!  tokens_no_whitespace(-Tokens:list(atom))// is nondet.
 2226%
 2227%   Defensively take tokens from the  input. Backtracking takes more
 2228%   tokens.  Tokens  cannot  include  whitespace.  Word  tokens  are
 2229%   returned as their represented words.
 2230
 2231tokens_no_whitespace([]) -->
 2232    [].
 2233tokens_no_whitespace([Word|T]) -->
 2234    [ w(Word) ],
 2235    !,
 2236    tokens_no_whitespace(T).
 2237tokens_no_whitespace([H|T]) -->
 2238    [H],
 2239    { \+ space_token(H) },
 2240    tokens_no_whitespace(T).
 2241
 2242token(Token) -->
 2243    [Token],
 2244    { token(Token) }.
 2245
 2246token(w(_)) :- !.
 2247token(Token) :- atom(Token).
 2248
 2249%!  limit(+Count, :Rule)//
 2250%
 2251%   As limit/2, but for grammar rules.
 2252
 2253:- meta_predicate limit(+,2,?,?). 2254
 2255limit(Count, Rule, Input, Rest) :-
 2256    Count > 0,
 2257    State = count(0),
 2258    call(Rule, Input, Rest),
 2259    arg(1, State, N0),
 2260    N is N0+1,
 2261    (   N =:= Count
 2262    ->  !
 2263    ;   nb_setarg(1, State, N)
 2264    ).
 2265
 2266
 2267                 /*******************************
 2268                 *           MESSAGES           *
 2269                 *******************************/
 2270
 2271:- multifile
 2272    prolog:message//1. 2273
 2274prolog:message(pldoc(deprecated_tag(Name, Tag))) -->
 2275    [ 'PlDoc: Deprecated tag @~w (use @~w)'-[Name, Tag]
 2276    ].
 2277prolog:message(pldoc(unknown_tag(Name))) -->
 2278    [ 'PlDoc: unknown tag @~w'-[Name]
 2279    ]