/* This file is part of ClioPatria. Author: HTTP: http://e-culture.multimedian.nl/ GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git Copyright: 2007, E-Culture/MultimediaN ClioPatria is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. ClioPatria is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with ClioPatria. If not, see . */ :- module(rdf_search, [ rdf_keyword_search/4, % +KeyWord, +TargetCond, -State, +Options rdf_init_state/3, % +TargetCond, -State, +Options rdf_start_search/2, % +Query, -State rdf_extend_search/1, % !State rdf_prune_search/1, % !State rdf_prune_search/2, % !State, +Options rdf_search_property/2, % +Graph, ?Property rdf_inverse_property/2 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(record)). :- use_module(library(count)). :- use_module(library(assoc)). :- use_module(library(pairs)). :- use_module(library(lists)). :- use_module(library(debug)). :- use_module(library(option)). :- use_module(rdf_graph). :- use_module(kwd_search). /** Search RDF graph @tbd Proper location for find_literals/3 @author Jan Wielemaker */ :- meta_predicate rdf_keyword_search(+, 1, -, +), rdf_init_state(1, -, +). :- record state(graph, % Current search graph start, targets, % Collected hits target_count = 0, % # Targets found target_condition, % Test resource as a target threshold = 0.05, % Graph search threshold literal_threshold = 0.05, % Literal matching threshold literal_score = true, % Use literal score in graph search edge_limit = 0, % limit edges per node target_expand = true, % expand targets expand_node). % Generate new edges %% rdf_keyword_search(+Keyword, :TargetCond, -State, +Options) % % Initiate a search-graph from a keyword by adding all matching % literals to the initial state of the graph. Options: % % * expand_node(:Goal) % Called as Goal(+Object, -Link) to create new edges. % % * threshold(+Number) % Nodes with a weight lower then Threshold are not further explore % % * literal_threshold(+Number) % Literals with a minimum edit distance above the literal threshold % are added to the search graph. % % * edge_limit(+Number) % Per Node a maximum of edges are added to the graph (0 is unbound). % % * expand_target(+Boolean) % When true target nodes are alse expanded rdf_keyword_search(Keyword, TargetCond, State, Options) :- rdf_init_state(TargetCond, State, Options), rdf_start_search(Keyword, State). %% rdf_init_state(:TargetCond, -State, +Options) % % Initiate a search-graph state. % Options see rdf_keyword_search/4 rdf_init_state(TargetCond, State, Options) :- new_search_graph(Graph), empty_assoc(Start), empty_assoc(Targets), strip_module(TargetCond, M, TC), make_state([ graph(Graph), start(Start), targets(Targets), target_condition(M:TC) ], State0), meta_options(rdf_search:is_meta, Options, MetaOptions), set_state_fields(MetaOptions, State0, State, _RestOptions). %% rdf_start_search(+Query, -State) % % Starts the search based on a query consisting of a: % % * List of Resources % * Resource % * String rdf_start_search(Query, State) :- ( is_list(Query) -> Resources = Query ; is_resource(Query) -> Resources = [Query] ), !, debug(query, 'rdf_start_search Resource to query for ~p', [Resources]), state_start(State, Start0), state_graph(State, Graph), add_resources(Resources, Start0, Graph, Start, Links), set_start_of_state(Start, State), add_hits(Links, State). rdf_start_search(Query, State) :- state_literal_threshold(State, Threshold), debug(graph_search_algorithm, 'Step 1: Find Literals based on query: ~p with treshold: ~p', [Query, threshold(Threshold)]), find_literals(Query, Literals, [threshold(Threshold)]), length(Literals, Length), debug(graph_search_algorithm, 'Result Step 1: ~p resulting literals with scores: ~p', [Length, Literals]), state_graph(State, Graph), state_start(State, Start0), ( state_literal_score(State, false) -> add_literals_no_score(Literals, Start0, Graph, Start) ; add_literals(Literals, Start0, Graph, Start) ), set_start_of_state(Start, State). is_meta(expand_node). is_resource(literal(_)) :- !. is_resource(R) :- rdf(R,_,_),!. is_resource(R) :- rdf(_,R,_),!. is_resource(R) :- rdf(_,_,R),!. %% rdf_extend_search(!State) is semidet. % % Expand the currently best node from the agenda. Fails if the % agenda is empty. If the next node has no followers, it silently % continues to the next node on the agenda. rdf_extend_search(State) :- state_graph(State, Graph), state_expand_node(State, ExpandNode), search_graph_next_agenda(Graph, Expand, Score), state_threshold(State, Threshold), state_edge_limit(State, EdgeLimit), state_target_expand(State, TargetExpand), ( TargetExpand == false, search_graph_node_type(Graph, Expand, target) -> debug(rdf_search, 'Stop expanding target ~p (score ~2f)', [Expand, Score]) ; Score >= Threshold -> ( EdgeLimit > 0 -> answer_set(L, edge(ExpandNode, Expand, Score, L), EdgeLimit, Links) ; findall(L, edge(ExpandNode, Expand, Score, L), Links) ), length(Links, Len), debug(rdf_search, 'Expanding ~p (score ~2f, ~D successors)', [Expand, Score, Len]), ( Links == [] -> rdf_extend_search(State) ; add_edges(Links, Graph, Expand, State), add_hits(Links, State) ) ; debug(rdf_search, 'Stopped expanding at ~p (score ~2f)', [Expand, Score]), fail ). %% edge(:Expand, +Object, +Score, -Link) is nondet. % % Generate edges, expanding Object. Returned Link is one of % % * i(S,P,W) % Represents rdf(S,P,Object), costing weight W. % % * f(S,P,W) % Represents rdf(Object,P,S), costing weight W. % % In these results, W is the cost traveling over the link. It is % a float between 0.0 (inifinite cost) and 1.0 (no cost). edge(Expand, From, _, Link) :- var(Expand), !, edge(From, Link). edge(Expand, From, Score, Link) :- call(Expand, From, Score, Link). %edge(Expand, From, _, Link) :- %call(Expand, From, Link). %% edge(+Object, -Link) is nondet. % % Default predicate to generate edges. % % @see ClioPatria's expansion is in graph_search:edge/2. edge(O, i(S,P,W)) :- weighted_edge(O, S, P, W), debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]), W > 0.0001. weighted_edge(O, S, P, W) :- setof(S, edge_i(O, S, P), Ss), ( predicate_weight(P, W) -> member(S, Ss) ; length(Ss, Len), member(S, Ss), subject_weight(S, Len, W) ). edge_i(O, S, P) :- rdf(S, P, O). edge_i(O, S, P) :- rdf(O, P0, S), rdf_has(P0, owl:inverseOf, P). predicate_weight(P, 1) :- rdfs_subproperty_of(P, rdfs:label). predicate_weight(P, 1) :- rdf_equal(P, owl:sameAs). subject_weight(S, _, 1) :- rdf_is_bnode(S), !. subject_weight(_, Count, W) :- W is 1/max(3, Count). %% add_literals(+Lits:list(Score-Literal), +Graph) is det. % % Adds the starting points for the search. add_literals([], Assoc, _, Assoc). add_literals([Score-Lit|T], Assoc0, Graph, Assoc) :- put_assoc(literal(Lit), Assoc0, found, Assoc1), search_graph_add_node(Graph, literal(Lit), [ score(Score), type(start) ]), add_literals(T, Assoc1, Graph, Assoc). add_literals_no_score([], Assoc, _, Assoc). add_literals_no_score([_S-Lit|T], Assoc0, Graph, Assoc) :- put_assoc(literal(Lit), Assoc0, found, Assoc1), search_graph_add_node(Graph, literal(Lit), [ score(1), type(start) ]), add_literals_no_score(T, Assoc1, Graph, Assoc). add_resources([], Assoc, _, Assoc, []). add_resources([R|Rs], Assoc0, Graph, Assoc, [f(R)|Ls]) :- put_assoc(R, Assoc0, found, Assoc1), search_graph_add_node(Graph, R, [ score(1), type(start) ]), add_resources(Rs, Assoc1, Graph, Assoc, Ls). %% add_edges(+Links, +Graph, +O, +State) is det. % % Links is a list of i(S,P,W) or f(S,P,W). If the graph already % contains an edge that is considered a reverse property of the % new edge, we do not add the new edge. add_edges([], _, _, _). add_edges([H|T], Graph, Expand, State) :- add_edge(H, Expand, State), add_edges(T, Graph, Expand, State). add_edge(Link, O, State) :- state_graph(State, Graph), options_from_link(Link, S, P, Options, IF), ( rdf_inverse_property(P, IP), ( IF == i % Adding inverse property -> search_graph_rdf(Graph, O, IP, S) ; search_graph_rdf(Graph, S, IP, O) ), debug(rdf_search(inverse), 'Found ~p ~p ~p', [S, IP, O]) -> true ; search_graph_add_edge(Graph, O, P, S, Options) ). options_from_link(i(S,P,W), S, P, [ weight(W), inverse(true) ], i). options_from_link(f(S,P,W), S, P, [ weight(W) ], f). %% rdf_inverse_property(+P1, ?P2) is nondet. % % True if P1 and P2 are each others inverses. rdf_inverse_property(P1, P2) :- rdf_has(P1, owl:inverseOf, P2). rdf_inverse_property(P1, P2) :- rdf_has(P2, owl:inverseOf, P1). rdf_inverse_property(P, P) :- rdf(P, rdf:type, owl:'SymmetricProperty'). %% add_hits(+Links, +State) is det. % % Hits that satisfy target_condition are added to the target list. add_hits([], _) :- !. add_hits(Links, State) :- state_target_condition(State, Cond), findall(S, ( member(H,Links), arg(1, H, S), call(Cond, S) ), Hits0), sort(Hits0, Hits), ( Hits = [] -> true ; state_targets(State, Assoc0), state_graph(State, Graph), state_target_count(State, C0), add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C), set_target_count_of_state(C, State), set_targets_of_state(Assoc, State) ). add_hits_to_target([], Assoc, C, _, Assoc, C). add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :- get_assoc(Hit, Assoc0, _), !, add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C). add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :- C1 is C0 + 1, put_assoc(Hit, Assoc0, found, Assoc1), % Other value? ( search_graph_node_type(Graph, Hit, start) -> true ; search_graph_set_node_type(Graph, Hit, target) ), add_hits_to_target(Hits, Assoc1, C1, Graph, Assoc, C). %% rdf_prune_search(!Graph) is det. % % Prune all dead-ends from the search graph. rdf_prune_search(State) :- rdf_prune_search(State, []). rdf_prune_search(State, Options) :- state_targets(State, Assoc), assoc_to_list(Assoc, Pairs), pairs_keys_values(Pairs, TNodes, _), state_graph(State, Graph), search_graph_drains(Graph, Drains), sort(TNodes, TNodeSet), sort(Drains, DrainSet), ord_subtract(DrainSet, TNodeSet, DeadEnds), ( memberchk(recursive, Options) -> full_prune(Graph, DeadEnds) ; search_graph_prune(Graph, DeadEnds, _) ). full_prune(_Graph, []) :- !. full_prune(Graph, DeadEnds) :- search_graph_prune(Graph, DeadEnds, NewLeaves), full_prune(Graph, NewLeaves). %% rdf_search_property(+Graph, ?Prop). % % Extract features from the search. Defined properties are: % % * target_count(-Count) % Number of (unique) targets found. % % * targets(-Targets:list(Score-Target) % List of targets found sorted by Score (highest score first) % There are no duplicate targets in the list. % % * graph_size(-Count) % Number of nodes in the search-graph % % * sources(-List) % List of source-nodes (start of paths). % % * drains(-List) % List if drain-nodes (end of paths). % % * rdf(-RDF:list(rdf(S,P,O))) % Extract the current graph as a set of RDF triples. rdf_search_property(Graph, Prop) :- debug(rdf_search, "Properties: ~p", [Prop]), search_property(Prop, Graph). search_property(graph(Graph), State) :- state_graph(State, Graph). search_property(state_start(Assoc), State) :- state_start(State, Assoc). search_property(start(Pairs), State) :- state_start(State, Assoc), state_graph(State, Graph), assoc_to_list(Assoc, Nodes), scored_nodes(Nodes, Graph, Scored), keysort(Scored, Sorted), reverse(Sorted, Pairs). search_property(target_count(C), State) :- state_target_count(State, C). search_property(state_targets(Assoc), State) :- state_targets(State, Assoc). search_property(targets(L), State) :- state_targets(State, Assoc), state_graph(State, Graph), assoc_to_list(Assoc, Pairs), scored_nodes(Pairs, Graph, Scored), keysort(Scored, Sorted), reverse(Sorted, L). search_property(graph_size(C), State) :- state_graph(State, Graph), search_graph_size(Graph, C). search_property(agenda(Agenda), State) :- state_graph(State, Graph), search_graph_agenda(Graph, Agenda). search_property(drains(Drains), State) :- state_graph(State, Graph), search_graph_drains(Graph, Drains). search_property(sources(Sources), State) :- state_graph(State, Graph), search_graph_sources(Graph, Sources). search_property(node_score_list(Scores), State) :- state_graph(State, Graph), search_graph_nodes_score_list(Graph, Scores). search_property(rdf(Triples), State) :- state_graph(State, Graph), search_graph_rdf_graph(Graph, Triples). scored_nodes([], _, []) :- !. scored_nodes([H-_|T0], Graph, [S-H|T]) :- search_graph_node_score(Graph, H, S),!, scored_nodes(T0, Graph, T). scored_nodes([_|T0], Graph, T) :- scored_nodes(T0, Graph, T).