/* This file is part of ClioPatria. Author: HTTP: http://e-culture.multimedian.nl/ GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git Copyright: 2007, E-Culture/MultimediaN ClioPatria is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. ClioPatria is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with ClioPatria. If not, see . */ :- module(rdf_cluster, [ search_path/3, % +Target, +Graph, -Path search_path/4, % +Method, +Target, +Graph, -Path cached_search_path/6, % +Method, +Target, +Graph, empty_path_cache/1, % -Cache graph_path/5, % +Target, +Graph, +CacheIn -CacheOut, -Path schema_path/2, % +Path, -SchemaPath predicate_schema_path/2, % +Path, -SchemaPath partial_schema_path/2, % +Path, -PartialSchemaPath canonical_path/2, % +Path, -CanonicalPath strip_alignment/2, % +Path, -StrippedPath strip_rdf_value/2, step_weight/4, direct_path/2 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(assoc)). :- use_module(library(lists)). :- use_module(library(debug)). :- use_module(rdf_graph). :- use_module(rdf_backward_search). %% canonical_path(+PathIn, -PathOut) % % Reduce schema path to a canonical form. canonical_path(PathIn, PathOut) :- strip_rdf_value(PathIn, Path), merge_transitive(Path, Path1), merge_related(Path1, PathOut). %% directh_path(+PathIn, -PathOut) % % Reduce path to a direct path. direct_path([S,P,O], [S,P,O]) :- !. direct_path([S,P,O,Prop,L], [S,P,O,Prop,L]) :- rdfs_subproperty_of(Prop,rdfs:label), !. direct_path(_, other). %% strip_alignment(+IPathIn, -IPathOut) is det. % % Strip alignment relations (owl:sameAs and skos:exactMatch) % from a path. strip_alignment([], []). strip_alignment([One], [One]) :- !. strip_alignment([_O1, P, O2|T0], T) :- alignment_predicate(P), !, strip_alignment([O2|T0], T). strip_alignment([O,P|T0], [O,P|T]) :- strip_alignment(T0, T). term_expansion(alignment_predicate(P0), alignment_predicate(P)) :- rdf_global_id(P0, P). alignment_predicate(owl:sameAs). alignment_predicate(skos:exactMatch). %% merge_transitive(+PathIn, -PathOut) is det. % % Merge transitive relations from a path. merge_transitive([One], [One]). merge_transitive([O1, P, _, P, O3|T0], T) :- transitive_predicate(P), !, merge_transitive([O1,P,O3|T0], T). merge_transitive([O,P|T0], [O,P|T]) :- merge_transitive(T0, T). transitive_predicate(P) :- rdfs_individual_of(P, owl:'TransitiveProperty'), !. %% merge_related(+PathIn, -PathOut) is det. % % Merge related relations from a path. merge_related([One], [One]). merge_related([O,P|T0], [O,P|T]) :- merge_related(T0, T). %% strip_rdf_value(+PathIn, -PathOut) % % Remove rdf:value construction from the Path. strip_rdf_value([One], [One]) :- !. strip_rdf_value([S,P,_,R,V|T], [S,P|Rest]) :- rdf_equal(R, rdf:value), !, strip_rdf_value([V|T], Rest). strip_rdf_value([O,P|T0], [O,P|T]) :- strip_rdf_value(T0, T). %% search_path(+TargetNode, +Graph, -Path) is det. %% search_path(+Method, +TargetNode, +Graph, -Path) is det. % % Path connect TargetNode to a Node with type =start=. Note that % if the search started with a target, the node-type is =target= % and we return an empty Path. % % @param Path is list with alternating property and value, % ended in the start-node (typically a literal). % Path starts with TargetNode. % @param Method one of =best= or =shortest= search_path(Target, Graph, Path) :- search_path(best, Target, Graph, Path). search_path(best, Target, Graph, Path) :- best_first_search(Target, Graph, -, _, Path), !. search_path(breadth, Target, Graph, Path) :- breadth_first_search(Target, Graph, -, _, Path), !. search_path(_, Target, _, [Target]). % search started with target %% cached_search_path(+Method, +TargetNode, +Graph, %% +CacheIn, -CacheOut, -Path) is det. % % Version of search_path/4 that maintains a cache of search % results, to avoid searching the whole graph for each result. The % cache is updated on each result found. Typically it is used in a % loop to obtain the paths for a list of targets. % % @see empty_path_cache/1 to create the initial cache. cached_search_path(best, Target, Graph, CacheIn, CacheOut, Path) :- best_first_search(Target, Graph, CacheIn, CacheOut, Path). cached_search_path(breadth, Target, Graph, CacheIn, CacheOut, Path) :- breadth_first_search(Target, Graph, CacheIn, CacheOut, Path). %cached_search_path(_, Target, _, Cache, Cache, [Target]). %% empty_path_cache(-Cache) % % Produce initial cache for cached_search_path/6. empty_path_cache(Cache) :- empty_assoc(Cache). %% best_first_search(+Target, +Graph, %% +CacheIn, -CacheOut, -Path) is nondet. % % Execute a best-first search through the graph back from the % Target to a start node. % % @param CacheIn is an assoc used to cache previous results. Pass % the atom '-' to disable caching. best_first_search(Target, Graph, CacheIn, CacheOut, Path) :- empty_assoc(Assoc0), put_assoc(Target, Assoc0, true, Done), best_first_search([Target], Graph, CacheIn, CacheOut, Done, Path). best_first_search([AH|_Agenda], _Graph, CacheIn, CacheOut, _Done, Path) :- CacheIn \== (-), agenda_head(AH, R, Path0), get_assoc(R, CacheIn, RestPath), !, debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), reverse(Path0, Path1), append(Path1, RestPath, Path), update_cache(Path, CacheIn, CacheOut). best_first_search([AH|_Agenda], Graph, CacheIn, CacheOut, _Done, Path) :- agenda_head(AH, R, Path0), search_graph_node_type(Graph, R, start), !, reverse(Path0, Path), update_cache(Path, CacheIn, CacheOut). best_first_search([AH|Agenda], Graph, CacheIn, CacheOut, Done, Path) :- agenda_head(AH, R, Path0), put_assoc(R, Done, true, Done2), findall(n(Weight, [V,P|Path0]), ( search_graph_rdf(Graph, R, P, V), \+ get_assoc(V, Done2, _), step_weight(P, V, Graph, Weight) ), Steps0 ), append(Steps0, Agenda, Agenda1), sort(Agenda1, Agenda2), reverse(Agenda2, NewAgenda), best_first_search(NewAgenda, Graph, CacheIn, CacheOut, Done2, Path). update_cache(_, -, -) :- !. % not cached update_cache(Path, CacheIn, CacheOut) :- debug(path, 'Adding path ~p', [Path]), path_into_cache(Path, CacheIn, CacheOut). path_into_cache([], Cache, Cache). path_into_cache(Path, CacheIn, CacheOut) :- Path = [R|RPath], ( get_assoc(R, CacheIn, _) -> CacheOut = CacheIn ; put_assoc(R, CacheIn, RPath, CacheTmp), ( RPath = [_P|Rest] -> path_into_cache(Rest, CacheTmp, CacheOut) ; CacheOut = CacheTmp ) ). %% breadth_first_search(+Target, +Graph, %% +CacheIn, -CacheOut, -Path) is nondet. % % Execute a breath first search through the search graph, finding % the shortest path from a target back to a start node. breadth_first_search(Target, Graph, CacheIn, CacheOut, Path) :- empty_assoc(Assoc0), put_assoc(Target, Assoc0, true, Done), breadth_first_search([Target|Tail], Tail, Graph, CacheIn, CacheOut, Done, Path). breadth_first_search(Agenda, ATail, _, _, _, _, _) :- Agenda == ATail, !, fail. breadth_first_search([AH|_Agenda], _, _, CacheIn, CacheOut, _, Path) :- agenda_head(AH, R, Path0), get_assoc(R, CacheIn, RestPath), !, debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), reverse(Path0, Path1), append(Path1, RestPath, Path), update_cache(Path, CacheIn, CacheOut). breadth_first_search([AH|_Agenda], _, Graph, CacheIn, CacheOut, _, Path) :- agenda_head(AH, R, Path0), search_graph_node_type(Graph, R, start), reverse(Path0, Path), update_cache(Path, CacheIn, CacheOut). breadth_first_search([AH|Agenda], ATail, Graph, CacheIn, CacheOut, Done, Path) :- agenda_head(AH, R, Path0), put_assoc(R, Done, true, Done2), findall(n(W, [V,P2|Path0]), ( search_graph_rdf(Graph, R, P2, V), % FIXME \+ get_assoc(V, Done2, true), step_weight(P2, V, Graph, W) ), Steps0), sort(Steps0, Steps1), reverse(Steps1, Steps), % heighest weight first append(Steps, NewTail, ATail), breadth_first_search(Agenda, NewTail, Graph, CacheIn, CacheOut, Done2, Path). agenda_head(n(_,Path), R, Path) :- !, Path = [R,_P|_]. agenda_head(R, R, [R]). step_weight(P, _V, _Graph, Weight) :- predicate_weight(P, Weight), !. step_weight(_, _V, _Graph, 0.3). %% enumarate_path(+Target, +Graph, %% +CacheIn, -CacheOut, -Path) is nondet. % % Execute a best-first search through the graph back from the % Target to a start node. % % @param CacheIn is an assoc used to cache previous results. Pass % the atom '-' to disable caching. graph_path(Target, Graph, CacheIn, CacheOut, Path) :- empty_assoc(Assoc0), put_assoc(Target, Assoc0, true, Done), graph_path([Target], Graph, CacheIn, CacheOut, Done, Path). graph_path([AH|_Agenda], _Graph, CacheIn, CacheOut, _Done, Path) :- CacheIn \== (-), path_head(AH, R, Path0), get_assoc(R, CacheIn, RestPath), debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), reverse(Path0, Path1), append(Path1, RestPath, Path), update_cache(Path, CacheIn, CacheOut). graph_path([AH|_Agenda], Graph, CacheIn, CacheOut, _Done, Path) :- path_head(AH, R, Path0), search_graph_node_type(Graph, R, start), reverse(Path0, Path), update_cache(Path, CacheIn, CacheOut). graph_path([AH|Agenda], Graph, CacheIn, CacheOut, Done, Path) :- path_head(AH, R, Path0), put_assoc(R, Done, true, Done2), findall([V,P|Path0], ( search_graph_rdf(Graph, R, P, V), \+ get_assoc(V, Done2, _) ), Steps0 ), Steps0 \== [], append(Steps0, Agenda, Agenda1), sort(Agenda1, Agenda2), reverse(Agenda2, NewAgenda), graph_path(NewAgenda, Graph, CacheIn, CacheOut, Done2, Path). path_head([R|Path], R, [R|Path]) :- !. path_head(R, R, [R]). %% schema_path(+InstancePath, -SchemaPath) is det. % % Abstract a path by lifting all resources to their class/concept % and all predicates to their interface properties. % % @param InstancePath is a list of alternating nodes and edges % (predicates) that starts with the origin of the search % and end with the target (e.g. artwork). % @param SchemaPath is a list of the same length as InstancePath % holding the abstracted path. % @see Keep in sync with kwd_search:predicate_path/2 from old % basic search. schema_path([], []). schema_path([V], [Class]) :- !, ( V = literal(_) -> rdf_equal(Class, rdfs:'Literal') ; Class = V % keep the URI ). predicate_schema_path([], []). predicate_schema_path([V], [V]). %% partial_schema_path(+InstancePath, -SchemaPath) is det. % % Abstract a path by lifting those resources for which an % abstraction is defined. partial_schema_path([], []). partial_schema_path([R|T], [Abstract|Rest]) :- ( R = literal(_) -> rdf_equal(Abstract, rdfs:'Literal') ; catch(cliopatria:abstract_class(R, Abstract),_,fail) -> true ; Abstract = R ), partial_schema_path(T, Rest). cliopatria:abstract_class(A,A).