/* This file is part of ClioPatria. Author: HTTP: http://e-culture.multimedian.nl/ GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git Copyright: 2007, E-Culture/MultimediaN ClioPatria is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. ClioPatria is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with ClioPatria. If not, see . */ :- module(rdf_graph, [ new_search_graph/1, % -Graph search_graph_add_node/3, % !Graph, +Node, +Fields search_graph_add_edge/4, % !Graph, +From, +Pred, +To search_graph_add_edge/5, % !Graph, +From, +Pred, +To, +Weight search_graph_set_node_type/3, % !Graph, +Node, +Type search_graph_prune/2, % !Graph, +Nodes search_graph_prune/3, % !Graph, +Node, -New search_graph_drains/2, % +Graph, -Drains search_graph_sources/2, % +Graph, -Sources % Agenda search_graph_next_agenda/3, % !Graph, -Node, -Score search_graph_agenda/2, % +Graph, -Pairs % Queries search_graph_size/2, % +Graph, -NodeCount search_graph_node_score/3, % +Graph, +Node, -Score search_graph_node_type/3, % +Graph, +Node, -Type search_graph_nodes_score_list/2, % @Graph, -list(Node-Score) % RDF Query graph search_graph_rdf_graph/2, % +Graph, -Triples search_graph_rdf/4, % +Graph, ?S, ?P, ?O search_graph_rdfs/4 % +Graph, ?S, ?P, ?O ]). :- use_module(library(record)). :- use_module(library(rbtrees)). :- use_module(library(error)). :- use_module(library(lists)). :- use_module(library(option)). :- use_module(library(semweb/rdf_db)). /** Representing RDF search paths ---++ Introduction This module represents a search-graph through an RDF graph starting at one or more locations in the graph. This search-graph is essentially a subgraph of the original graph. As it is intended as a dynamically evolving structure during reasoning, it is represented as a pure Prolog term and therefore subject to backtracking and normal Prolog memory management. ---++ Representation The graph is represented as an RB tree from URI to a record node_data(Next, Previous, Score, Type) where Next and Previous are lists of p([fi](Predicate), Target). If f(P), the link is rdf(Prev, P, Next), if i(P), the link is rdf(Next, P, Prev). The score of the node indicates semantic distance, ranging from 0 (very far way) to 1 (very close). Finally, Type classifies the node as one of =start=, =node= or =target=. The graphs as a whole contains $ nodes : An rb_tree mapping node-id to a node_data term as described above. $ agenda : An rb_tree mapping score to a list of nodes with that score. $ size : Nunber of nodes (for statistical reasons). ---++ Issues ---+++ Shall we deal internally with owl:sameAs? ---+++ How to deal with owl property types? $ owl:inverseOf : $ owl:TransitiveProperty : $ owl:SymmetricProperty : @tbd No weight loss passing a blank node? */ :- record graph(nodes, agenda, size=0). :- record node_data(next=[], previous=[], score=1, type=node). %% new_search_graph(-Graph) % % Create a new search graph. The initial graph is empty. new_search_graph(Graph) :- rb_empty(Nodes), rb_empty(Agenda), make_graph([nodes(Nodes), agenda(Agenda)], Graph). %% search_graph_add_node(!Graph, +Node, +Fields) % % Add a (start-)node to the graph search_graph_add_node(Graph, Node, Fields) :- graph_nodes(Graph, Assoc0), make_node_data(Fields, NodeData), rb_insert(Assoc0, Node, NodeData, Assoc1), set_nodes_of_graph(Assoc1, Graph), graph_size(Graph, Size0), Size is Size0 + 1, set_size_of_graph(Size, Graph), node_data_score(NodeData, Score0), add_new_node_to_agenda(Graph, Node, Score0, Score), ( Score == Score0 -> true ; set_score_of_node_data(Score, NodeData) ). %% search_graph_add_edge(!Graph, +From, +Pred, +To) is det. %% search_graph_add_edge(!Graph, +From, +Pred, +To, +Options) is det. % % Add a link {From->Pred->To} to the graph. Options include % % * weight(+Weight) % Weight to use for the predicate link instead of the default % % * inverse(true) % is formed from rdf(To,Pred,From) search_graph_add_edge(Graph, From, Pred, To) :- search_graph_add_edge(Graph, From, Pred, To, []). search_graph_add_edge(Graph, From, Pred, To, Options) :- graph_nodes(Graph, Assoc0), rb_lookup(From, FromData, Assoc0), ( option(inverse(true), Options) -> PTerm = i(Pred) ; PTerm = f(Pred) ), add_next_node_data(FromData, p(PTerm, To)), node_data_score(FromData, Score0), ( option(weight(Weight), Options) -> Score is Score0 * Weight ; update_score(Graph, From, Pred, To, Score0, Score) ), ( rb_lookup(To, ToData, Assoc0) -> add_previous_node_data(ToData, p(PTerm, From)), node_data_score(ToData, Score1), join_score(Score1, Score, NewScore), reschedule_node(Graph, To, Score1, NewScore, FinalScore), set_score_of_node_data(FinalScore, ToData) ; search_graph_add_node(Graph, To, [ score(Score), previous([p(PTerm, From)]) ]) ). add_next_node_data(Data, Next) :- node_data_next(Data, Next0), set_next_of_node_data([Next|Next0], Data). add_previous_node_data(Data, Previous) :- node_data_previous(Data, Previous0), set_previous_of_node_data([Previous|Previous0], Data). %% join_score(+Score1, +Score2, -Score) % % Join two scores comming from independent paths. Scores are % considered probabilities. join_score(S1, S2, S) :- S is 1-((1-S1)*(1-S2)). %% update_score(+Graph, +From, +Pred, +To, +Score0, -Score) % % Given that From has score ScoreO, compute the score for To based % on Pred. First prototype used a predefined value per predicate % in the range 0..1. Alternatively we can use statistical % measures. Some porposals: % % * How many subjects of the same type have this property with % this value? % * How many subjects have this object (over this relation)? % % @param Score Float in the range 0.0..1.0 update_score(_Graph, From, Pred, To, Score0, Score) :- by_in_links(From, Pred, To, Factor), Score is Score0 * Factor. by_in_links(_From, Pred, To, Factor) :- rdf_estimate_complexity(_, Pred, To, Complexity), Factor is 1/Complexity. %% search_graph_set_node_type(+Graph, +Node, +Type) is det. % % Set the type for Node in Graph to Type. % % @error existence_error(node, Node) search_graph_set_node_type(Graph, Node, Type) :- graph_nodes(Graph, Assoc), ( rb_lookup(Node, Data, Assoc) -> set_type_of_node_data(Type, Data) ; existence_error(node, Node) ). %% search_graph_node_type(+Graph, +Node, -Type) is det. % % Type is the classification for Node in Graph. % % @tbd Generalise modes? search_graph_node_type(Graph, Node, Type) :- graph_nodes(Graph, Assoc), ( rb_lookup(Node, Data, Assoc) -> node_data_type(Data, Type) ; existence_error(node, Node) ). %% search_graph_node_score(+Graph, +Node, -Weight) is det. % % Weight is the current score for Node in Graph. search_graph_node_score(Graph, Node, Weight) :- graph_nodes(Graph, Assoc), rb_lookup(Node, Data, Assoc), node_data_score(Data, Weight). %% search_graph_size(+Graph, -Size) is det. % % Size of the graph in nodes. search_graph_size(Graph, Size) :- graph_size(Graph, Size). /******************************* * PRUNING * *******************************/ %% search_graph_prune(!Graph, +Nodes) is det. %% search_graph_prune(!Graph, +Nodes, -NewLeaves) is det. % % Delete non-target leave nodes from Graph. search_graph_prune(Graph, Prune) :- search_graph_prune(Graph, Prune, _). search_graph_prune(Graph, Prune, NewLeaves) :- is_list(Prune), !, find_ancestors(Prune, Graph, Ancestors), group_pairs_by_key(Ancestors, Grouped), delete_next_links(Grouped, Graph, NewLeaves). search_graph_prune(Graph, Prune, NewLeaves) :- search_graph_prune(Graph, [Prune], NewLeaves). %% find_ancestors(+Prune, +Graph, -Ancestors) % % @param Ancestors is a difference-list of terms From-p(Pred,To) find_ancestors(Prune, Graph, Ancestors) :- graph_nodes(Graph, Nodes), find_ancestors(Prune, Graph, Nodes, RestNodes, 0, Deleted, Ancestors, []), graph_size(Graph, Size0), Size is Size0 - Deleted, set_size_of_graph(Size, Graph), set_nodes_of_graph(RestNodes, Graph). find_ancestors([], _, Nodes, Nodes, C, C, A, A). find_ancestors([H|T], Graph, Nodes0, Nodes, C0, C, A, AT) :- rb_lookup(H, Data, Nodes0), ( node_data_type(Data, target) -> find_ancestors(T, Graph, Nodes0, Nodes, C0, C, A, AT) ; node_data_next(Data, Next), Next \== [] -> permission_error(Graph, prune, H) ; node_data_previous(Data, Prev), make_a_list(Prev, H, A, AT0), rb_delete(Nodes0, H, Nodes1), C1 is C0 + 1, find_ancestors(T, Graph, Nodes1, Nodes, C1, C, AT0, AT) ). make_a_list([], _, A, A). make_a_list([p(Pred, From)|T0], To, [From-p(Pred,To)|T1], T) :- make_a_list(T0, To, T1, T). delete_next_links([], _, []). delete_next_links([H-DelNext|T], Graph, NewLeaves) :- graph_nodes(Graph, Nodes), rb_lookup(H, Data, Nodes), node_data_next(Data, Next0), subtract_set(Next0, DelNext, Next), set_next_of_node_data(Next, Data), ( Next == [] -> NewLeaves = [H|NewLeaves1], delete_next_links(T, Graph, NewLeaves1) ; delete_next_links(T, Graph, NewLeaves) ). %% subtract_set(+Set, +Subtract, -Remaining) is det. % % Remaining are the elements from Set that are not in Subtract. % None of the sets is ordered. We have to apply heuristics to find % out the best approach. subtract_set(Set, [One], Remaining) :- !, selectchk(One, Set, Remaining). subtract_set(Set, Subtract, Remaining) :- sort(Set, SSet), sort(Subtract, SSubstract), ord_subtract(SSet, SSubstract, Remaining). %% search_graph_nodes_score_list(@Graph, -Scores:list(Node-Score)) is det. % % Scores is a list with Node-Score pairs, with all nodes in % Graph. search_graph_nodes_score_list(Graph, Scores) :- graph_nodes(Graph, Assoc), rb_visit(Assoc, NodeList), scores(NodeList, Scores). scores([], []). scores([Node-Data|T0], [Node-Score|T]) :- node_data_score(Data, Score), !, scores(T0, T). %% search_graph_drains(@Graph, -Nodes) % % List of all drains (end-of-path) search_graph_drains(Graph, Nodes) :- graph_nodes(Graph, Assoc), rb_visit(Assoc, NodeList), drains(NodeList, Nodes). drains([], []). drains([Node-Data|T0], [Node|T]) :- node_data_next(Data, []), !, drains(T0, T). drains([_|T0], T) :- drains(T0, T). %% search_graph_sources(@Graph, -Nodes) % % List of all drains (start-of-path) search_graph_sources(Graph, Nodes) :- graph_nodes(Graph, Assoc), rb_visit(Assoc, NodeList), sources(NodeList, Nodes). sources([], []). sources([Node-Data|T0], [Node|T]) :- node_data_previous(Data, []), !, sources(T0, T). sources([_|T0], T) :- sources(T0, T). /******************************* * AGENDA * *******************************/ %% add_new_node_to_agenda(!Graph, +Node, +Score, -FinalScore) is det. % % Add a new node to the agenda. We want a unique score for each % node, so we can find the node from the score. Therefore, if % there is already a node with this score, we use a slightly lower % score, etc. % % @tbd: use rb_update/5. add_new_node_to_agenda(Graph, Node, Score, FinalScore) :- must_be(between(0.0,1.0), Score), graph_agenda(Graph, RBTree0), into_agenda(RBTree0, Node, Score, RBTree, FinalScore), set_agenda_of_graph(RBTree, Graph). into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- rb_insert_new(RBTree0, Score, Node, RBTree), !, FinalScore = Score. into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- rb_previous(RBTree0, Score, PrevScore, _), !, ( Score - PrevScore =< 0.001 -> between(1, infinite, Step), FinalScore is PrevScore - 0.000000001*Step ; between(1, infinite, Step), FinalScore is Score - 0.001*Step ), rb_insert_new(RBTree0, FinalScore, Node, RBTree), !. into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- FinalScore is Score - 0.001, rb_insert_new(RBTree0, FinalScore, Node, RBTree). %% reschedule_node(!Graph, +Node, +OldScore, -NewScore) % % Change the score of Node in the agenda of Graph. Leaves the % agenda untouched if Node is not in it as this implies the node % is already expanded. reschedule_node(Graph, Node, OldScore, NewScore, FinalScore) :- graph_agenda(Graph, RBTree0), ( rb_delete(OldScore, Node, RBTree0) -> into_agenda(RBTree0, Node, NewScore, RBTree, FinalScore), set_agenda_of_graph(RBTree, Graph) ; FinalScore = NewScore % already expanded ). %% search_graph_next_agenda(!Graph, -Node, -Score) is semidet. % % Fetches the best node from the agenda and deletes it search_graph_next_agenda(Graph, Node, Score) :- graph_agenda(Graph, RBTree0), rb_del_max(RBTree0, Score, Node, RBTree), set_agenda_of_graph(RBTree, Graph). %% search_graph_agenda(+Graph, -Pairs) % % Get agenda from the graph, showing not-expanded nodes as pairs % Score-Node. E.g. % % == % Agenda = [1-a, 0.5-b, 0.25-c] % == search_graph_agenda(Graph, Pairs) :- graph_agenda(Graph, RBTree), rb_visit(RBTree, Pairs0), reverse(Pairs0, Pairs). /******************************* * RDF * *******************************/ %% search_graph_rdf_graph(+Graph, -RDF:list(rdf(S,P,O))) is det. % % Create a list of rdf(S,P,O) triples from Graph. search_graph_rdf_graph(Graph, RDF) :- graph_nodes(Graph, Nodes), rb_visit(Nodes, Pairs), phrase(graph_to_rdf(Pairs), RDF). graph_to_rdf([]) --> []. graph_to_rdf([R-Data|T]) --> { node_data_next(Data, Next) }, node_to_rdf(Next, R), graph_to_rdf(T). node_to_rdf([], _) --> []. node_to_rdf([p(P, O)|T], S) --> link_to_rdf(P, S, O), node_to_rdf(T, S). link_to_rdf(f(P), S, O) --> [ rdf(S, P, O) ]. link_to_rdf(i(P), S, O) --> [ rdf(O, P, S) ]. %% search_graph_rdf(+Graph, ?S, ?P, ?O) is nondet. % % rdf(S,P,O) is an RDF statement in Graph. Provides indexed access % if either S or O are given. :- rdf_meta search_graph_rdf(+, r, r, o), search_graph_rdfs(+, r, r, o). search_graph_rdf(Graph, S, P, O) :- ground(S), ground(O), !, graph_nodes(Graph, Nodes), rb_lookup(S, SData, Nodes), rb_lookup(O, OData, Nodes), ( node_data_next(SData, SN), dmember(P, p(f(P),O), SN) ; node_data_next(OData, PN), dmember(P, p(i(P),S), PN) ). search_graph_rdf(Graph, S, P, O) :- ( search_graph_rdf_(Graph, S, f(P), O) ; search_graph_rdf_(Graph, O, i(P), S) ). search_graph_rdf_(Graph, S, P, O) :- graph_nodes(Graph, Nodes), ( ground(S) -> rb_lookup(S, SData, Nodes), node_data_next(SData, List), member(p(P, O), List) ; ground(O) -> rb_lookup(O, OData, Nodes), node_data_previous(OData, List), member(p(P, S), List) ; rb_in(S, SData, Nodes), node_data_next(SData, List), member(p(P, O), List) ). dmember(Ground, Element, List) :- ground(Ground), !, memberchk(Element, List). dmember(_Ground, Element, List) :- member(Element, List), !. %% search_graph_rdfs(+Graph, ?S, ?P, ?O) is nondet. % % rdf(S,P0,O) is an RDF statement in Graph and P0 is a % subPropertyOf P. Provides indexed access if either S or O are % given. % % @see This is the same as rdf_has/3 from library rdf_db.pl search_graph_rdfs(Graph, S, P, O) :- search_graph_rdf(Graph, S, P0, O), rdf_reachable(P0, rdfs:subPropertyOf, P).