/* This file is part of ClioPatria.
Author:
HTTP: http://e-culture.multimedian.nl/
GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git
GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git
GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git
Copyright: 2007, E-Culture/MultimediaN
ClioPatria is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
ClioPatria is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ClioPatria. If not, see .
*/
:- module(rdf_search,
[ rdf_keyword_search/4, % +KeyWord, +TargetCond, -State, +Options
rdf_init_state/3, % +TargetCond, -State, +Options
rdf_start_search/2, % +Query, -State
rdf_extend_search/1, % !State
rdf_prune_search/1, % !State
rdf_prune_search/2, % !State, +Options
rdf_search_property/2, % +Graph, ?Property
rdf_inverse_property/2
]).
:- use_module(library(semweb/rdf_db)).
:- use_module(library(semweb/rdfs)).
:- use_module(library(record)).
:- use_module(library(count)).
:- use_module(library(assoc)).
:- use_module(library(pairs)).
:- use_module(library(lists)).
:- use_module(library(debug)).
:- use_module(library(option)).
:- use_module(rdf_graph).
:- use_module(kwd_search).
/** Search RDF graph
@tbd Proper location for find_literals/3
@author Jan Wielemaker
*/
:- meta_predicate
rdf_keyword_search(+, 1, -, +),
rdf_init_state(1, -, +).
:- record
state(graph, % Current search graph
start,
targets, % Collected hits
target_count = 0, % # Targets found
target_condition, % Test resource as a target
threshold = 0.05, % Graph search threshold
literal_threshold = 0.05, % Literal matching threshold
literal_score = true, % Use literal score in graph search
edge_limit = 0, % limit edges per node
target_expand = true, % expand targets
expand_node). % Generate new edges
%% rdf_keyword_search(+Keyword, :TargetCond, -State, +Options)
%
% Initiate a search-graph from a keyword by adding all matching
% literals to the initial state of the graph. Options:
%
% * expand_node(:Goal)
% Called as Goal(+Object, -Link) to create new edges.
%
% * threshold(+Number)
% Nodes with a weight lower then Threshold are not further explore
%
% * literal_threshold(+Number)
% Literals with a minimum edit distance above the literal threshold
% are added to the search graph.
%
% * edge_limit(+Number)
% Per Node a maximum of edges are added to the graph (0 is unbound).
%
% * expand_target(+Boolean)
% When true target nodes are alse expanded
rdf_keyword_search(Keyword, TargetCond, State, Options) :-
rdf_init_state(TargetCond, State, Options),
rdf_start_search(Keyword, State).
%% rdf_init_state(:TargetCond, -State, +Options)
%
% Initiate a search-graph state.
% Options see rdf_keyword_search/4
rdf_init_state(TargetCond, State, Options) :-
new_search_graph(Graph),
empty_assoc(Start),
empty_assoc(Targets),
strip_module(TargetCond, M, TC),
make_state([ graph(Graph),
start(Start),
targets(Targets),
target_condition(M:TC)
], State0),
meta_options(rdf_search:is_meta, Options, MetaOptions),
set_state_fields(MetaOptions, State0, State, _RestOptions).
%% rdf_start_search(+Query, -State)
%
% Starts the search based on a query consisting of a:
%
% * List of Resources
% * Resource
% * String
rdf_start_search(Query, State) :-
( is_list(Query)
-> Resources = Query
; is_resource(Query)
-> Resources = [Query]
), !,
debug(query, 'rdf_start_search Resource to query for ~p', [Resources]),
state_start(State, Start0),
state_graph(State, Graph),
add_resources(Resources, Start0, Graph, Start, Links),
set_start_of_state(Start, State),
add_hits(Links, State).
rdf_start_search(Query, State) :-
state_literal_threshold(State, Threshold),
debug(graph_search_algorithm, 'Step 1: Find Literals based on query: ~p with treshold: ~p', [Query, threshold(Threshold)]),
find_literals(Query, Literals, [threshold(Threshold)]),
length(Literals, Length),
debug(graph_search_algorithm, 'Result Step 1: ~p resulting literals with scores: ~p', [Length, Literals]),
state_graph(State, Graph),
state_start(State, Start0),
( state_literal_score(State, false)
-> add_literals_no_score(Literals, Start0, Graph, Start)
; add_literals(Literals, Start0, Graph, Start)
),
set_start_of_state(Start, State).
is_meta(expand_node).
is_resource(literal(_)) :- !.
is_resource(R) :- rdf(R,_,_),!.
is_resource(R) :- rdf(_,R,_),!.
is_resource(R) :- rdf(_,_,R),!.
%% rdf_extend_search(!State) is semidet.
%
% Expand the currently best node from the agenda. Fails if the
% agenda is empty. If the next node has no followers, it silently
% continues to the next node on the agenda.
rdf_extend_search(State) :-
state_graph(State, Graph),
state_expand_node(State, ExpandNode),
search_graph_next_agenda(Graph, Expand, Score),
state_threshold(State, Threshold),
state_edge_limit(State, EdgeLimit),
state_target_expand(State, TargetExpand),
( TargetExpand == false,
search_graph_node_type(Graph, Expand, target)
-> debug(rdf_search, 'Stop expanding target ~p (score ~2f)',
[Expand, Score])
; Score >= Threshold
-> ( EdgeLimit > 0
-> answer_set(L, edge(ExpandNode, Expand, Score, L), EdgeLimit, Links)
; findall(L, edge(ExpandNode, Expand, Score, L), Links)
),
length(Links, Len),
debug(rdf_search, 'Expanding ~p (score ~2f, ~D successors)',
[Expand, Score, Len]),
( Links == []
-> rdf_extend_search(State)
; add_edges(Links, Graph, Expand, State),
add_hits(Links, State)
)
; debug(rdf_search, 'Stopped expanding at ~p (score ~2f)',
[Expand, Score]),
fail
).
%% edge(:Expand, +Object, +Score, -Link) is nondet.
%
% Generate edges, expanding Object. Returned Link is one of
%
% * i(S,P,W)
% Represents rdf(S,P,Object), costing weight W.
%
% * f(S,P,W)
% Represents rdf(Object,P,S), costing weight W.
%
% In these results, W is the cost traveling over the link. It is
% a float between 0.0 (inifinite cost) and 1.0 (no cost).
edge(Expand, From, _, Link) :-
var(Expand), !,
edge(From, Link).
edge(Expand, From, Score, Link) :-
call(Expand, From, Score, Link).
%edge(Expand, From, _, Link) :-
%call(Expand, From, Link).
%% edge(+Object, -Link) is nondet.
%
% Default predicate to generate edges.
%
% @see ClioPatria's expansion is in graph_search:edge/2.
edge(O, i(S,P,W)) :-
weighted_edge(O, S, P, W),
debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]),
W > 0.0001.
weighted_edge(O, S, P, W) :-
setof(S, edge_i(O, S, P), Ss),
( predicate_weight(P, W)
-> member(S, Ss)
; length(Ss, Len),
member(S, Ss),
subject_weight(S, Len, W)
).
edge_i(O, S, P) :-
rdf(S, P, O).
edge_i(O, S, P) :-
rdf(O, P0, S),
rdf_has(P0, owl:inverseOf, P).
predicate_weight(P, 1) :-
rdfs_subproperty_of(P, rdfs:label).
predicate_weight(P, 1) :-
rdf_equal(P, owl:sameAs).
subject_weight(S, _, 1) :-
rdf_is_bnode(S), !.
subject_weight(_, Count, W) :-
W is 1/max(3, Count).
%% add_literals(+Lits:list(Score-Literal), +Graph) is det.
%
% Adds the starting points for the search.
add_literals([], Assoc, _, Assoc).
add_literals([Score-Lit|T], Assoc0, Graph, Assoc) :-
put_assoc(literal(Lit), Assoc0, found, Assoc1),
search_graph_add_node(Graph, literal(Lit),
[ score(Score),
type(start)
]),
add_literals(T, Assoc1, Graph, Assoc).
add_literals_no_score([], Assoc, _, Assoc).
add_literals_no_score([_S-Lit|T], Assoc0, Graph, Assoc) :-
put_assoc(literal(Lit), Assoc0, found, Assoc1),
search_graph_add_node(Graph, literal(Lit),
[ score(1),
type(start)
]),
add_literals_no_score(T, Assoc1, Graph, Assoc).
add_resources([], Assoc, _, Assoc, []).
add_resources([R|Rs], Assoc0, Graph, Assoc, [f(R)|Ls]) :-
put_assoc(R, Assoc0, found, Assoc1),
search_graph_add_node(Graph, R,
[ score(1),
type(start)
]),
add_resources(Rs, Assoc1, Graph, Assoc, Ls).
%% add_edges(+Links, +Graph, +O, +State) is det.
%
% Links is a list of i(S,P,W) or f(S,P,W). If the graph already
% contains an edge that is considered a reverse property of the
% new edge, we do not add the new edge.
add_edges([], _, _, _).
add_edges([H|T], Graph, Expand, State) :-
add_edge(H, Expand, State),
add_edges(T, Graph, Expand, State).
add_edge(Link, O, State) :-
state_graph(State, Graph),
options_from_link(Link, S, P, Options, IF),
( rdf_inverse_property(P, IP),
( IF == i % Adding inverse property
-> search_graph_rdf(Graph, O, IP, S)
; search_graph_rdf(Graph, S, IP, O)
),
debug(rdf_search(inverse), 'Found ~p ~p ~p', [S, IP, O])
-> true
; search_graph_add_edge(Graph, O, P, S, Options)
).
options_from_link(i(S,P,W), S, P, [ weight(W), inverse(true) ], i).
options_from_link(f(S,P,W), S, P, [ weight(W) ], f).
%% rdf_inverse_property(+P1, ?P2) is nondet.
%
% True if P1 and P2 are each others inverses.
rdf_inverse_property(P1, P2) :-
rdf_has(P1, owl:inverseOf, P2).
rdf_inverse_property(P1, P2) :-
rdf_has(P2, owl:inverseOf, P1).
rdf_inverse_property(P, P) :-
rdf(P, rdf:type, owl:'SymmetricProperty').
%% add_hits(+Links, +State) is det.
%
% Hits that satisfy target_condition are added to the target list.
add_hits([], _) :- !.
add_hits(Links, State) :-
state_target_condition(State, Cond),
findall(S, ( member(H,Links),
arg(1, H, S),
call(Cond, S)
), Hits0),
sort(Hits0, Hits),
( Hits = []
-> true
; state_targets(State, Assoc0),
state_graph(State, Graph),
state_target_count(State, C0),
add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C),
set_target_count_of_state(C, State),
set_targets_of_state(Assoc, State)
).
add_hits_to_target([], Assoc, C, _, Assoc, C).
add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :-
get_assoc(Hit, Assoc0, _), !,
add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C).
add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :-
C1 is C0 + 1,
put_assoc(Hit, Assoc0, found, Assoc1), % Other value?
( search_graph_node_type(Graph, Hit, start)
-> true
; search_graph_set_node_type(Graph, Hit, target)
),
add_hits_to_target(Hits, Assoc1, C1, Graph, Assoc, C).
%% rdf_prune_search(!Graph) is det.
%
% Prune all dead-ends from the search graph.
rdf_prune_search(State) :-
rdf_prune_search(State, []).
rdf_prune_search(State, Options) :-
state_targets(State, Assoc),
assoc_to_list(Assoc, Pairs),
pairs_keys_values(Pairs, TNodes, _),
state_graph(State, Graph),
search_graph_drains(Graph, Drains),
sort(TNodes, TNodeSet),
sort(Drains, DrainSet),
ord_subtract(DrainSet, TNodeSet, DeadEnds),
( memberchk(recursive, Options)
-> full_prune(Graph, DeadEnds)
; search_graph_prune(Graph, DeadEnds, _)
).
full_prune(_Graph, []) :- !.
full_prune(Graph, DeadEnds) :-
search_graph_prune(Graph, DeadEnds, NewLeaves),
full_prune(Graph, NewLeaves).
%% rdf_search_property(+Graph, ?Prop).
%
% Extract features from the search. Defined properties are:
%
% * target_count(-Count)
% Number of (unique) targets found.
%
% * targets(-Targets:list(Score-Target)
% List of targets found sorted by Score (highest score first)
% There are no duplicate targets in the list.
%
% * graph_size(-Count)
% Number of nodes in the search-graph
%
% * sources(-List)
% List of source-nodes (start of paths).
%
% * drains(-List)
% List if drain-nodes (end of paths).
%
% * rdf(-RDF:list(rdf(S,P,O)))
% Extract the current graph as a set of RDF triples.
rdf_search_property(Graph, Prop) :-
debug(rdf_search, "Properties: ~p", [Prop]),
search_property(Prop, Graph).
search_property(graph(Graph), State) :-
state_graph(State, Graph).
search_property(state_start(Assoc), State) :-
state_start(State, Assoc).
search_property(start(Pairs), State) :-
state_start(State, Assoc),
state_graph(State, Graph),
assoc_to_list(Assoc, Nodes),
scored_nodes(Nodes, Graph, Scored),
keysort(Scored, Sorted),
reverse(Sorted, Pairs).
search_property(target_count(C), State) :-
state_target_count(State, C).
search_property(state_targets(Assoc), State) :-
state_targets(State, Assoc).
search_property(targets(L), State) :-
state_targets(State, Assoc),
state_graph(State, Graph),
assoc_to_list(Assoc, Pairs),
scored_nodes(Pairs, Graph, Scored),
keysort(Scored, Sorted),
reverse(Sorted, L).
search_property(graph_size(C), State) :-
state_graph(State, Graph),
search_graph_size(Graph, C).
search_property(agenda(Agenda), State) :-
state_graph(State, Graph),
search_graph_agenda(Graph, Agenda).
search_property(drains(Drains), State) :-
state_graph(State, Graph),
search_graph_drains(Graph, Drains).
search_property(sources(Sources), State) :-
state_graph(State, Graph),
search_graph_sources(Graph, Sources).
search_property(node_score_list(Scores), State) :-
state_graph(State, Graph),
search_graph_nodes_score_list(Graph, Scores).
search_property(rdf(Triples), State) :-
state_graph(State, Graph),
search_graph_rdf_graph(Graph, Triples).
scored_nodes([], _, []) :- !.
scored_nodes([H-_|T0], Graph, [S-H|T]) :-
search_graph_node_score(Graph, H, S),!,
scored_nodes(T0, Graph, T).
scored_nodes([_|T0], Graph, T) :-
scored_nodes(T0, Graph, T).