View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2018, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_history,
   36          [ rdfh_transaction/1,         % :Goal
   37            rdfh_assert/3,              % +S,+P,+O
   38            rdfh_retractall/3,          % +S,+P,+O
   39            rdfh_update/3,              % +S[->NS],+P[->NP],+O[->[NO]
   40            rdfh_db_transaction/3,      % ?DB, +Condition, ?Transaction
   41            rdfh_triple_transaction/2,  % +Triple, -Transaction
   42            rdfh_transaction_member/2   % ?Action, +Transaction
   43          ]).   44:- use_module(library('http/http_session')).   45:- use_module(library(lists)).   46:- use_module(library(record)).   47:- use_module(library(error)).   48:- use_module(library(debug)).   49:- use_module(library('semweb/rdf_persistency')).   50:- use_module(library('semweb/rdf_db')).

RDF Persistent store change history

This module deals with accessing the journal files of the RDF persistency layer to get insight in the provenance and history of the RDF database. It is designed for Wiki-like collaborative editing of an RDF graph. We make the following assumptions:

Users are identified using a URI, typically an OpenID (http://openid.net/) Triples created by a user are added to a named graph identified by the URI of the user. Changes are grouped using rdf_transaction(Goal, log(Message, User)) The number that is associated with the named graph of a triple (normally expressing the line number in the source) is used to store the time-stamp. Although this information is redundant (the time stamp is the same as for the transaction), it allows for binary search through the history file for the enclosing transaction.

author
- Jan Wielemaker */
To be done
- Cleanup thoughts on delete and update.
   75                 /*******************************
   76                 *         DECLARATIONS         *
   77                 *******************************/
   78
   79:- module_transparent
   80    rdfh_transaction/1.   81
   82:- rdf_meta
   83    rdfh_assert(r,r,o),
   84    rdfh_retractall(r,r,o),
   85    rdfh_update(t,t,t).   86
   87:- multifile
   88    rdfh_hook/1.   89
   90:- record
   91    rdf_transaction(id:integer,
   92                    nesting:integer,
   93                    time:number,
   94                    message,
   95                    actions:list,
   96                    other_graphs:list).   97
   98
   99                 /*******************************
  100                 *         MODIFICATIONS        *
  101                 *******************************/
 rdfh_transaction(:Goal) is semidet
Run Goal using rdf_transaction/2, using information from the HTTP layer to provide OpenID and session-id.
  108rdfh_transaction(Goal) :-
  109    rdfh_user(User),
  110    transaction_context(Context),
  111    rdf_transaction(Goal, log(rdfh([user(User)|Context]), User)).
 rdfh_assert(+S, +P, +O) is det
Assert a triple, adding current user and time to the triple context.
  119rdfh_assert(S,P,O) :-
  120    (   rdf_active_transaction(log(rdfh(_), User))
  121    ->  rdfh_time(Time),
  122        rdf_assert(S,P,O,User:Time)
  123    ;   throw(error(permission_error(assert, triple, rdf(S,P,O)),
  124                    context(_, 'No rdfh_transaction/1')))
  125    ).
 rdfh_retractall(+S, +P, +O) is det
Retract triples that match {S,P,O}. Note that all matching triples are added to the journal, so we can undo the action as well as report on retracted triples, even if multiple are retracted at the same time.

One of the problems we are faced with is that a retract action goes into the journal of the user whose triple is retracted, which may or may not be the one who performed the action.

  139rdfh_retractall(S,P,O) :-
  140    (   rdf_active_transaction(log(rdfh(_), _User))
  141    ->  rdf_retractall(S,P,O)
  142    ;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  143                    context(_, 'No rdfh_transaction/1')))
  144    ).
 rdfh_update(+S, +P, +O) is det
More tricky stuff, replacing a triple by another. Typically this will be changing the predicate or object. Provenance info should move the new triple to the user making the change, surely if the object is changed. If the predicate is changed to a related predicate, this actually becomes less obvious.

Current simple-minded approach is to turn an update into a retract and assert. The S,P,O specifications are either a ground value or of the form Old -> New. Here is an example:

rdfh_update(Work, Style, wn:oldstyle -> wn:newstyle)
  163rdfh_update(S,P,O) :-
  164    (   rdf_active_transaction(log(rdfh(_), User))
  165    ->  update(S,P,O, rdf(RS, RP, RO), rdf(AS, AP, AO)),
  166        must_be(ground, RS),
  167        must_be(ground, RP),
  168        must_be(ground, RO),
  169        rdfh_time(Time),
  170        rdf_retractall(RS, RP, RO),
  171        rdf_assert(AS, AP, AO, User:Time)
  172    ;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  173                    context(_, 'No rdfh_transaction/1')))
  174    ).
  175
  176update(Ss, Ps, Os, rdf(S0, P0, O0), rdf(S,P,O)) :-
  177    update(Ss, S0, S),
  178    update(Ps, P0, P),
  179    update(Os, O0, O).
  180
  181update(From->To, From, To) :- !.
  182update(Value, Value, Value).
 transaction_context(-Term) is det
Context to pass with an RDF transaction. Note that we pass the user. We don't need this for simple additions, but we do need it to track deletions.
  191transaction_context(Context) :-
  192    (   rdfh_session(Session)
  193    ->  Context = [session(Session)]
  194    ;   Context = []
  195    ).
 rdfh_session(-Session) is semidet
Session is a (ground) identifier for the current session.
  201rdfh_session(Session) :-
  202    rdfh_hook(session(Session)),
  203    !.
  204rdfh_session(Session) :-
  205    catch(http_session_id(Session), _, fail).
 rdfh_user(-URI) is det
Get user-id of current session.
To be done
- Make hookable, so we can use the SeRQL user/openid hooks
  214rdfh_user(User) :-
  215    rdfh_hook(user(User)),
  216    !.
  217rdfh_user(OpenId) :-
  218    http_session_data(openid(OpenId)).
 rdfh_time(-Time:integer) is det
Get time stamp as integer. Second resolution is enough, and avoids rounding problems associated with floats.
  225rdfh_time(Seconds) :-
  226    get_time(Now),
  227    Seconds is round(Now).
  228
  229
  230                 /*******************************
  231                 *       EXAMINE HISTORY        *
  232                 *******************************/
 rdfh_triple_transaction(+Triple:rdf(S,P,O), -Transaction) is nondet
True if the (partial) Triple is modified in Transaction.
  238rdfh_triple_transaction(rdf(S,P,O), Transaction) :-
  239    rdf(S,P,O,DB:Time),
  240    After is Time - 1,
  241    rdfh_db_transaction(DB, after(After), Transaction),
  242    rdfh_transaction_member(assert(S,P,O,Time), Transaction).
 rdfh_db_transaction(?DB, +Condition, ?Transaction) is nondet
True if Transaction satisfying Condition was executed on DB. Condition is one of:
true
Always true, returns all transactions.
id(Id)
Specifies the identifier of the transaction. Only makes sense if DB is specified as transaction identifiers are local to each DB.
after(Time)
True if transaction is executed at or after Time.
To be done
- More conditions (e.g. before(Time)).
  260rdfh_db_transaction(DB, true, Transaction) :-
  261    !,
  262    rdf_journal_file(DB, Journal),
  263    journal_transaction(Journal, Transaction).
  264rdfh_db_transaction(DB, id(Id), Transaction) :-
  265    !,
  266    must_be(atom, DB),
  267    rdf_journal_file(DB, Journal),
  268    open_journal(Journal, Fd),
  269    call_cleanup((seek_journal(Fd, id(Id)),
  270                  read_transaction(Fd, Transaction)),
  271                 close(Fd)).
  272rdfh_db_transaction(DB, Condition, Transaction) :-
  273    !,
  274    valid_condition(Condition),
  275    rdf_journal_file(DB, Journal),
  276    open_journal(Journal, Fd),
  277    seek_journal(Fd, Condition),
  278    stream_transaction(Fd, Transaction).
  279
  280valid_condition(Var) :-
  281    var(Var),
  282    !,
  283    instantiation_error(Var).
  284valid_condition(after(Time)) :-
  285    !,
  286    must_be(number, Time).
  287valid_condition(Cond) :-
  288    type_error(condition, Cond).
 open_journal(+File, -Stream) is det
Open a journal file. Journal files are always UTF-8 encoded.
  294open_journal(JournalFile, Fd) :-
  295    open(JournalFile, read, Fd, [encoding(utf8)]).
 journal_transaction(+JournalFile, ?Transaction) is nondet
True if Transaction is a transaction in JournalFile,
  301journal_transaction(JournalFile, Transaction) :-
  302    open_journal(JournalFile, Fd),
  303    stream_transaction(Fd, Transaction).
  304
  305stream_transaction(JFD, Transaction) :-
  306    call_cleanup(read_transaction(JFD, Transaction), close(JFD)).
  307
  308read_transaction(In, Transaction) :-
  309    repeat,
  310       read(In, T0),
  311    (   T0 == end_of_file
  312    ->  !, fail
  313    ;   transaction(T0, In, T),     % transaction/3 is not steadfast
  314        T = Transaction
  315    ).
  316
  317transaction(begin(Id, Nest, Time, Msg), In,
  318            rdf_transaction(Id, Nest, Time, Msg, Actions, Others)) :-
  319    !,
  320    read(In, T2),
  321    read_transaction_actions(T2, Id, In, Actions, Others).
  322transaction(start(_), _, _) :- !, fail. % Open journal
  323transaction(end(_), _, _) :- !, fail.   % Close journal
  324transaction(Action, _, Action).         % Action outside transaction?
  325
  326read_transaction_actions(end(Id, _, Others), Id, _, [], Others) :- !.
  327read_transaction_actions(end_of_file, _, _, [], []) :- !. % TBD: Incomplete transaction (error)
  328read_transaction_actions(Action, Id, In, Actions, Others) :-
  329    ignore_in_transaction(Action),
  330    !,
  331    read(In, T2),
  332    read_transaction_actions(T2, Id, In, Actions, Others).
  333read_transaction_actions(Action, Id, In, [Action|Actions], Others) :-
  334    read(In, T2),
  335    read_transaction_actions(T2, Id, In, Actions, Others).
  336
  337ignore_in_transaction(start(_)).
  338ignore_in_transaction(end(_)).
  339ignore_in_transaction(begin(_,_,_,_)).
  340ignore_in_transaction(end(_,_,_)).
 seek_journal(+Fd:stream, +Spec) is semidet
See an open journal descriptor to the start of a transaction specified by Spec. Spec is one of:
after(Time)
First transaction at or after Time. Fails if there are no transactions after time.
id(Id)
Start of transaction labeled with given Id. Fails if there is no transaction labeled Id.

The implementation relies on the incrementing identifier numbers and time-stamps.

  358seek_journal(Fd, Spec) :-
  359    stream_property(Fd, file_name(File)),
  360    size_file(File, Size),
  361    Here is Size//2,
  362    Last = last(-),
  363    (   is_after_spec(Spec)
  364    ->  (   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  365        ->  true
  366        ;   arg(1, Last, StartOfTerm),
  367            StartOfTerm \== (-),
  368            seek(Fd, StartOfTerm, bof, _)
  369        )
  370    ;   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  371    ).
  372
  373is_after_spec(after(_Time)).
 bsearch_journal(+Fd, +Start, +Here, +End, +Spec, !Last) is semidet
Perform a binary search in the journal opened as Fd.
  379bsearch_journal(Fd, Start, Here, End, Spec, Last) :-
  380    start_of_transaction(Fd, Here, StartOfTerm, Begin),
  381    !,
  382    compare_transaction(Spec, Begin, Diff),
  383    (   Diff == (=)
  384    ->  seek(Fd, StartOfTerm, bof, _)
  385    ;   Diff == (<)
  386    ->  NewHere is Start+(Here-Start)//2,
  387        NewHere < Here,
  388        nb_setarg(1, Last, StartOfTerm),
  389        bsearch_journal(Fd, Start, NewHere, Here, Spec, Last)
  390    ;   NewHere is StartOfTerm+(End-StartOfTerm)//2,
  391        NewHere > StartOfTerm,
  392        bsearch_journal(Fd, StartOfTerm, NewHere, End, Spec, Last)
  393    ).
  394bsearch_journal(Fd, Start, Here, _End, Spec, Last) :-
  395    NewHere is Start+(Here-Start)//2,
  396    NewHere < Here,
  397    bsearch_journal(Fd, Start, NewHere, Here, Spec, Last).
  398
  399compare_transaction(id(Id), begin(Id2,_,_,_), Diff) :-
  400    !,
  401    compare(Diff, Id, Id2).
  402compare_transaction(after(Time), begin(_,_,T,_), Diff) :-
  403    !,
  404    compare(Diff, Time, T).
 start_of_transaction(+Fd, +From, -Start, -Term) is semidet
Term is the start term of the first transaction after byte position From. Fails if no transaction can be found after From.
  411start_of_transaction(Fd, From, Start, Term) :-
  412    seek(Fd, From, bof, _),
  413    skip(Fd, 10),
  414    repeat,
  415        seek(Fd, 0, current, Start),
  416        read(Fd, Term),
  417        (   transaction_start(Term)
  418        ->  !
  419        ;   Term == end_of_file
  420        ->  !, fail
  421        ;   fail
  422        ).
  423
  424transaction_start(begin(_Id,_Nest,_Time,_Message)).
 rdfh_transaction_member(Action, Transaction) is nondet
True if Action is an action in Transaction.
  430rdfh_transaction_member(Action, Transaction) :-
  431    rdf_transaction_actions(Transaction, Actions),
  432    member(Action, Actions)