35
   36:- module(rdf,
   37          [ load_rdf/2,                    38            load_rdf/3,                    39            xml_to_rdf/3,                  40            process_rdf/3                  41          ]).   42
   43:- meta_predicate
   44    load_rdf(+, -, :),
   45    process_rdf(+, :, :).   46
   47:- autoload(library(lists),[select/3,append/3]).   48:- autoload(library(option),[meta_options/3,option/3]).   49:- autoload(library(rdf_parser),
   50	    [ make_rdf_state/3, xml_to_plrdf/3, rdf_name_space/1,
   51              rdf_modify_state/3, element_to_plrdf/3
   52	    ]).   53:- autoload(library(rdf_triple),
   54	    [rdf_start_file/2,rdf_end_file/1,rdf_triples/2]).   55:- autoload(library(sgml),
   56	    [ load_structure/3, new_sgml_parser/2, set_sgml_parser/2,
   57	      open_dtd/3, xml_quote_attribute/2, sgml_parse/2,
   58	      free_sgml_parser/1, get_sgml_parser/2
   59	    ]).
   97load_rdf(File, Triples) :-
   98    load_rdf(File, Triples, []).
   99
  100load_rdf(File, Triples, M:Options0) :-
  101    entity_options(Options0, EntOptions, Options1),
  102    meta_options(load_meta_option, M:Options1, Options),
  103    init_ns_collect(Options, NSList),
  104    load_structure(File,
  105                   [ RDFElement
  106                   ],
  107                   [ dialect(xmlns),
  108                     space(sgml),
  109                     call(xmlns, rdf:on_xmlns)
  110                   | EntOptions
  111                   ]),
  112    rdf_start_file(Options, Cleanup),
  113    call_cleanup(xml_to_rdf(RDFElement, Triples0, Options),
  114                 rdf_end_file(Cleanup)),
  115    exit_ns_collect(NSList),
  116    post_process(Options, Triples0, Triples).
  117
  118entity_options([], [], []).
  119entity_options([H|T0], Entities, Rest) :-
  120    (   H = entity(_,_)
  121    ->  Entities = [H|ET],
  122        entity_options(T0, ET, Rest)
  123    ;   Rest = [H|RT],
  124        entity_options(T0, Entities, RT)
  125    ).
  126
  127load_meta_option(convert_typed_literal).
  131xml_to_rdf(XML, Triples, Options) :-
  132    is_list(Options),
  133    !,
  134    make_rdf_state(Options, State, _),
  135    xml_to_plrdf(XML, RDF, State),
  136    rdf_triples(RDF, Triples).
  137xml_to_rdf(XML, BaseURI, Triples) :-
  138    atom(BaseURI),
  139    !,
  140    xml_to_rdf(XML, Triples, [base_uri(BaseURI)]).
  141
  142
  143                   146
  147post_process([], Triples, Triples).
  148post_process([expand_foreach(true)|T], Triples0, Triples) :-
  149    !,
  150    expand_each(Triples0, Triples1),
  151    post_process(T, Triples1, Triples).
  152post_process([_|T], Triples0, Triples) :-
  153    !,
  154    post_process(T, Triples0, Triples).
  155
  156
  157                   160
  161expand_each(Triples0, Triples) :-
  162    select(rdf(each(Container), Pred, Object),
  163           Triples0, Triples1),
  164    !,
  165    each_triples(Triples1, Container, Pred, Object, Triples2),
  166    expand_each(Triples2, Triples).
  167expand_each(Triples, Triples).
  168
  169each_triples([], _, _, _, []).
  170each_triples([H0|T0], Container, P, O,
  171             [H0, rdf(S,P,O)|T]) :-
  172    H0 = rdf(Container, rdf:A, S),
  173    member_attribute(A),
  174    !,
  175    each_triples(T0, Container, P, O, T).
  176each_triples([H|T0], Container, P, O, [H|T]) :-
  177    each_triples(T0, Container, P, O, T).
  178
  179member_attribute(A) :-
  180    sub_atom(A, 0, _, _, '_').        181
  182
  183                 
  221process_rdf(File, OnObject, M:Options0) :-
  222    is_list(Options0),
  223    !,
  224    entity_options(Options0, EntOptions, Options1),
  225    meta_options(load_meta_option, M:Options1, Options2),
  226    option(base_uri(BaseURI), Options2, ''),
  227    rdf_start_file(Options2, Cleanup),
  228    strip_module(OnObject, Module, Pred),
  229    b_setval(rdf_object_handler, Module:Pred),
  230    nb_setval(rdf_options, Options2),
  231    nb_setval(rdf_state, -),
  232    init_ns_collect(Options2, NSList),
  233    (   File = stream(In)
  234    ->  Source = BaseURI
  235    ;   is_stream(File)
  236    ->  In = File,
  237        option(graph(Source), Options2, BaseURI)
  238    ;   open(File, read, In, [type(binary)]),
  239        Close = In,
  240        Source = File
  241    ),
  242    new_sgml_parser(Parser, [dtd(DTD)]),
  243    def_entities(EntOptions, DTD),
  244    (   Source \== []
  245    ->  set_sgml_parser(Parser, file(Source))
  246    ;   true
  247    ),
  248    set_sgml_parser(Parser, dialect(xmlns)),
  249    set_sgml_parser(Parser, space(sgml)),
  250    do_process_rdf(Parser, In, NSList, Close, Cleanup, Options2).
  251process_rdf(File, BaseURI, OnObject) :-
  252    process_rdf(File, OnObject, [base_uri(BaseURI)]).
  253
  254def_entities([], _).
  255def_entities([entity(Name, Value)|T], DTD) :-
  256    !,
  257    def_entity(DTD, Name, Value),
  258    def_entities(T, DTD).
  259def_entities([_|T0], DTD) :-
  260    def_entities(T0, DTD).
  261
  262def_entity(DTD, Name, Value) :-
  263    open_dtd(DTD, [], Stream),
  264    xml_quote_attribute(Value, QValue),
  265    format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
  266    close(Stream).
  267
  268
  269do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :-
  270    call_cleanup((   sgml_parse(Parser,
  271                                [ source(In),
  272                                  call(begin, on_begin),
  273                                  call(xmlns, on_xmlns)
  274                                | Options
  275                                ]),
  276                     exit_ns_collect(NSList)
  277                 ),
  278                 cleanup_process(Close, Cleanup, Parser)).
  279
  280cleanup_process(In, Cleanup, Parser) :-
  281    (   var(In)
  282    ->  true
  283    ;   close(In)
  284    ),
  285    free_sgml_parser(Parser),
  286    nb_delete(rdf_options),
  287    nb_delete(rdf_object_handler),
  288    nb_delete(rdf_state),
  289    nb_delete(rdf_nslist),
  290    rdf_end_file(Cleanup).
  291
  292on_begin(NS:'RDF', Attr, _) :-
  293    rdf_name_space(NS),
  294    !,
  295    nb_getval(rdf_options, Options),
  296    make_rdf_state(Options, State0, _),
  297    rdf_modify_state(Attr, State0, State),
  298    nb_setval(rdf_state, State).
  299on_begin(Tag, Attr, Parser) :-
  300    nb_getval(rdf_state, State),
  301    (   State == (-)
  302    ->  nb_getval(rdf_options, RdfOptions),
  303        (   memberchk(embedded(true), RdfOptions)
  304        ->  true
  305        ;   print_message(warning, rdf(unexpected(Tag, Parser)))
  306        )
  307    ;   get_sgml_parser(Parser, line(Start)),
  308        get_sgml_parser(Parser, file(File)),
  309        sgml_parse(Parser,
  310                   [ document(Content),
  311                     parse(content)
  312                   ]),
  313        b_getval(rdf_object_handler, OnTriples),
  314        element_to_plrdf(element(Tag, Attr, Content), Objects, State),
  315        rdf_triples(Objects, Triples),
  316        call(OnTriples, Triples, File:Start)
  317    ).
  325on_xmlns(NS, URL, _Parser) :-
  326    (   nb_getval(rdf_nslist, List),
  327        List = list(L0)
  328    ->  nb_linkarg(1, List, [NS=URL|L0])
  329    ;   true
  330    ).
  331
  332init_ns_collect(Options, NSList) :-
  333    (   option(namespaces(NSList), Options, -),
  334        NSList \== (-)
  335    ->  nb_setval(rdf_nslist, list([]))
  336    ;   nb_setval(rdf_nslist, -),
  337        NSList = (-)
  338    ).
  339
  340exit_ns_collect(NSList) :-
  341    (   NSList == (-)
  342    ->  true
  343    ;   nb_getval(rdf_nslist, list(NSList))
  344    ).
  345
  346
  347
  348                   351
  352:- multifile
  353    prolog:message/3.  354
  356
  357prolog:message(rdf(unparsed(Data))) -->
  358    { phrase(unparse_xml(Data), XML)
  359    },
  360    [ 'RDF: Failed to interpret "~s"'-[XML] ].
  361prolog:message(rdf(shared_blank_nodes(N))) -->
  362    [ 'RDF: Shared ~D blank nodes'-[N] ].
  363prolog:message(rdf(not_a_name(Name))) -->
  364    [ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ].
  365prolog:message(rdf(redefined_id(Id))) -->
  366    [ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ].
  367prolog:message(rdf(unexpected(Tag, Parser))) -->
  368    { get_sgml_parser(Parser, file(File)),
  369      get_sgml_parser(Parser, line(Line))
  370    },
  371    [ 'RDF: ', url(File:Line), ': Unexpected element ~w'-[Tag] ].
  372
  373
  374                   377
  378unparse_xml([]) -->
  379    !,
  380    [].
  381unparse_xml([H|T]) -->
  382    !,
  383    unparse_xml(H),
  384    unparse_xml(T).
  385unparse_xml(Atom) -->
  386    { atom(Atom)
  387    },
  388    !,
  389    atom(Atom).
  390unparse_xml(element(Name, Attr, Content)) -->
  391    "<",
  392    identifier(Name),
  393    attributes(Attr),
  394    (   { Content == []
  395        }
  396    ->  "/>"
  397    ;   ">",
  398        unparse_xml(Content)
  399    ).
  400
  401attributes([]) -->
  402    [].
  403attributes([H|T]) -->
  404    attribute(H),
  405    attributes(T).
  406
  407attribute(Name=Value) -->
  408    " ",
  409    identifier(Name),
  410    "=",
  411    value(Value).
  412
  413identifier(NS:Local) -->
  414    !,
  415    "{", atom(NS), "}",
  416    atom(Local).
  417identifier(Local) -->
  418    atom(Local).
  419
  420atom(Atom, Text, Rest) :-
  421    atom_codes(Atom, Chars),
  422    append(Chars, Rest, Text).
  423
  424value(Value) -->
  425    { atom_codes(Value, Chars)
  426    },
  427    "\"",
  428    quoted(Chars),
  429    "\"".
  430
  431quoted([]) -->
  432    [].
  433quoted([H|T]) -->
  434    quote(H),
  435    !,
  436    quoted(T).
  437
  438quote(0'<) --> "<".
  439quote(0'>) --> ">".
  440quote(0'") --> """.
  441quote(0'&) --> "&".
  442quote(X)   --> [X].
  443
  444
  445                   448
  449:- multifile prolog:meta_goal/2.  450prolog:meta_goal(process_rdf(_,G,_), [G+2])
 
RDF/XML parser
This module parses RDF/XML documents. It defines two processing modes: load_rdf/2 and load_rdf/3 which process a document into a list of
rdf(S,P,O)terms and process_rdf/3 which processes the input description-by-description and uses a callback to handle the triples.