cluster_search/commit
more or less working version of the cluster http api
author | Jacco van Ossenbruggen |
---|---|
Sat Apr 27 15:53:53 2013 +0200 | |
committer | Jacco van Ossenbruggen |
Sat Apr 27 15:53:53 2013 +0200 | |
commit | c1d44617bbc879953fe8fcd0934ea5cc079a07e3 |
tree | b131102766a61b2c794995fea30fc8f6f07cdb61 |
parent | 50a06c7fd99b96958b37e728611f8618f5a8e7c5 |
Diff style: patch stat
diff --git a/api/cluster_search.pl b/api/cluster_search.pl new file mode 100644 index 0000000..8b7553d --- /dev/null +++ b/api/cluster_search.pl @@ -0,0 +1,776 @@ +/* This file is part of ClioPatria. + + Author: Michiel Hildebrand + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(search_api, []). + + +/*************************************************** +* load modules +***************************************************/ + +% http library modules +:- use_module(library('http/http_dispatch')). +:- use_module(library('http/http_parameters')). +:- use_module(library('http/http_path')). + +:- use_module(library('http/json')). +:- use_module(library('http/json_convert')). +:- use_module(library('http/http_json')). + +:- use_module(library(settings)). +:- use_module(library(lists)). +:- use_module(library(count)). + +% semweb libraries +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/rdf_label)). + + +% util modules +:- use_module(library(cluster_search/rdf_cluster)). +:- use_module(library(cluster_search/rdf_search)). +:- use_module(library(cluster_search/rdf_graph)). +:- use_module(library(cluster_search/filter)). +:- use_module(library(cluster_search/rdfs_plus_skos)). +:- use_module(library(cluster_search/iface_util)). +:- use_module(library(cluster_search/graph_util)). +:- use_module(library(cluster_search/tree)). +:- use_module(library(cluster_search/tree_abstract)). +:- use_module(library(cluster_search/json_graph)). +:- use_module(library(cluster_search/parameters)). + + +% search modules +:- use_module(library(cluster_search/graph_search)). + + + +/*************************************************** +* http handlers +***************************************************/ + +:- http_handler(api(search), + search_api, + [ spawn(search), content_type(application/json) ]). +a + + +/*************************************************** +* Settings +***************************************************/ + +:- setting(search:basic_search_target, uri, rdfs:'Resource', + 'Default Target for search'). +:- setting(search:threshold, between(0.0,1.0), 0.05, + 'Graph-search threshold'). +:- setting(search:literal_threshold, between(0.0,1.0), 0.05, + 'Literal-search threshold'). +:- setting(search:literal_score, boolean, true, + 'Use score of string match in threshold of graph search'). +:- setting(search:steps, nonneg, 1000, + 'Maximum number of steps in the graph search (0 is unbound)'). +:- setting(search:edge_limit, nonneg, 0, + 'Limit extension of Node with a maximum number of edges \c + (0 is unbound)'). +:- setting(search:max, nonneg, 100, + 'Maximum number of results shown in the output'). +:- setting(search:cluster, oneof([concept, path, spath, role]), spath, + 'Create clusters by graph path or schema path'). +:- setting(search:search_path, oneof([best, breadth]), best, + 'Create clusters on best or shortest path'). +:- setting(search:sort, oneof([score, path_length, false]), score, + 'Sort the results by'). +:- setting(search:prune, boolean, true, + 'Prune the search Graph'). +:- setting(search:search_type, oneof([literal,concept,backward]), backward, + 'Method to traverse the graph'). + +/*************************************************** +* api parameters +***************************************************/ + +%% search_option(?Name, ?Type, ?Default, ?Description) is nondet. +% +% Parameters to control the graph exploration + +search_option(steps, nonneg, Steps, + 'The number of steps the graph is expanded (0 is unbound)') :- + setting(search:steps, Steps). +search_option(threshold, between(0.0,1.0), Threshold, + 'Minimum resource weight at which the \c + graph expansion is cut of') :- + setting(search:threshold, Threshold). +search_option(literal_threshold, between(0.0,1.0), Threshold, + 'Miminum distance between a keyword and the RDF literals') :- + setting(search:literal_threshold, Threshold). +search_option(literal_score, boolean, Boolean, + 'Use weight of string match in graph search') :- + setting(search:literal_score, Boolean). +search_option(edge_limit, nonneg, Limit, + 'Maximum number of edges expand per node in the \c + graph expansion (0 is unbound)') :- + setting(search:edge_limit, Limit). +search_option(rdfs_plus_skos, + list(oneof([ owl_sameas, owl_inverse, owl_transitive, + skos_exact, skos_broader, rdf_value + ])), + [], + 'Reasoning used when evaluating the filter and \c + when adding display information'). +search_option(uri, list(uri), [], + ''). +search_option(remove, list(uri), [], + ''). +search_option(search_type, oneof([literal,concept,backward]), Default, + '') :- + setting(search:search_type, Default). +search_option(expand, zero_or_more, [], + 'When search_type=concept, this determines graph traversal. available values are description, equivalent, narrower, related, rdf_value'). +search_option(prune, boolean, Default, + 'Remove dead-ends from the search-tree') :- + setting(search:prune, Default). +search_option(merge_sameas, boolean, false, + 'Squash networks of equivalent resources'). +search_option(bagify, boolean, false, + 'Combine nodes with the same place in the graph in an RDF bag'). +search_option(abstract, boolean, false, + 'Abstract properties and classes'). + +%% organize_option(?Name, ?Type, ?Default, ?Description) +% +% Parameters that organize the results + +organize_option(start, nonneg, 0, + 'First item that is returned'). +organize_option(end, nonneg, Max, + 'Last item that is returned. When the results \c + are clustered this is per cluster') :- + setting(search:max, Max). +organize_option(sort, atom, Method, + 'Sort the results according to some property Sort \c + is defined as an array with one or more of the \c + used labels or RDF properties') :- + setting(search:sort, Method). +organize_option(groupBy, atom, false, + 'Cluster results by some RDF property'). +organize_option(graph, atom, false, + 'Organize the results in a graph by adding all \c + triples in which the results occurs as a subject, \c + predicate or object'). +organize_option(tree, atom, false, + ''). +organize_option(predicate, atom, _, + ''). +organize_option(lens, atom, false, + ''). +organize_option(src, atom, false, + ''). + +%% display_option(?Name, ?Type, ?Default, ?Description) +% +% Parameters that control the presentation + +display_option(searchPath, atom, Method, + '') :- + setting(search:search_path, Method). +display_option(indent, atom, false, + ''). +display_option(displayLang, atom, en, + ''). % Work in progress towards REST +display_option(display, zero_or_more, [label], + ''). +display_option(subjectDisplay, zero_or_more, [label], + ''). +display_option(predicateDisplay, zero_or_more, [label], + ''). +display_option(objectDisplay, zero_or_more, [label], + ''). +display_option(rdfs_plus_skos, zero_or_more, [], + ''). +display_option(graphOutput, oneof([spo, sop, pso, pos, osp, ops]), pos, + ''). + +/*************************************************** +* handle reply +***************************************************/ + +%% search_api(+Request) +% +% The search API allows you to request a set of resources based on +% keywords, a structured filter or a set of URIs. The resources +% can optionally be organized in clusters or a graph and enriched +% with extra display information. The output format is based on +% the organization: +% +% * no organization: SPARQL JSON serialization +% * groupBy(): similar as the SPARQL JSON serialization, but the +% results are nested in clusters + + +search_api(Request) :- + ( debugging(profile(search)) + -> profile(do_search_api(Request)) + ; do_search_api(Request) + ). + +do_search_api(Request) :- + search_api_parameters(Request, Filter, SearchOpt, OrganizeOpt, DisplayOpt), + find_resources(Filter, Targets, State, SearchOpt), + length(Targets, Count), + organize_resources(Targets, State, Results, OrganizeOpt), + result_display_data(Results, State, ResultData, DisplayOpt), + write_data(ResultData, Count, DisplayOpt). + +%% search_api_parameters(+Request, -Filter, +%% -SearchOpt, -OrganizeOpt, -DisplayOpt) is det. +% +% Extract parameters from HTTP request. + +search_api_parameters(Request, Filter, + SearchOpt, OrganizeOpt, DisplayOpt) :- + http_options(search_option, SearchOpt, SParam), + http_options(organize_option, OrganizeOpt, OParam), + http_options(display_option, DisplayOpt, DParam), + append([SParam, OParam, DParam], Parameters), + http_parameters(Request, Parameters, [form_data(Params)]), + filter_from_parameters(Params, Filter). + + +/*************************************************** +* find resources +***************************************************/ + +%% find_resources(+Filter, -Results:[score-resource], -Graph, +Options) +% +% Results is a list of tuples score-resource. +% There are two types of algorithms to collect the results if there +% is a keyword present in the filter the RDF search graph is traversed, +% otherwise we do a structured query. + +find_resources(Filter, Results, State, Options) :- + ( select(keyword(Query), Filter, Filter1) + -> search_graph(Query, Filter1, Results, State, Options) + ; structured_query(Filter, Results, Options), + State = [] + ). + +%% search_graph(+Query, +Filter, -Results, -SearchState, -Options) +% +% Results is a list of score-resource pairs acquired by +% searching the RDF Graph. SearchState contains the search history +% including the search graph. + +search_graph(Query, Filter, Results, State, Options) :- + basic_filter(Filter, Filter1), % make sure the filter is not empty + graph_search(Query, State, [filter(Filter1)|Options]), + rdf_search_property(State, targets(Results)). + +%% structured_query(+Filter, -Results, +Options) +% +% Results is a list of score-resource pairs that match Filter. +% +% Options are: +% * uris(URIs) URIs are added to the set +% * remove(URIs) URIs are subtracted + +structured_query(Filter, Set, Options) :- + option(max(Max), Options, 100), + option(rdfs_plus_skos(Reasoning), Options, []), + ( Filter \== [], + filter_to_goal(Filter, R, Goal0, Reasoning) + -> % rdf_optimise(Goal0, Goal), + answer_count(R, Goal0, Max, Set0) + ; Set0 = [] + ), + ( option(uri(Add0), Options) + -> sort(Add0, Add), + ord_union(Add, Set0, Set1) + ; Set1 = Set0 + ), + ( option(remove(Remove0), Options) + -> sort(Remove0, Remove), + ord_subtract(Set1, Remove, Set) + ; Set = Set1 + ). + +%update_query((A,B), (B,A)). + + +%% basic_filter(+FilterIn, -FilterOut) +% +% Use constraint on basic search target in case filter is empty. + +basic_filter([], [type(Class)]) :- !, + setting(search:basic_search_target, Class). +basic_filter(Filter, Filter). + + +/*************************************************** +* result organization +***************************************************/ + +%% organize_resources(+Targets, +Graph, -Data, +Options) +% +% Data is a structure containing Targets. +% +% * groupBy(Grouping) +% * + +organize_resources(_Targets, State, triples(Triples), Options) :- + option(graph(graph), Options), !, + search_graph(State, Graph), + search_graph_rdf_graph(Graph, Triples). +organize_resources(Targets, _State, graph(DataGraph), Options) :- + option(graph(Type), Options), + memberchk(Type, [subject,predicate,object]), !, + option(start(Start), Options, 0), + option(end(End), Options, 100), + elem_select(Targets, Start, End, Selected), + targets_to_graph(Selected, Type, DataGraph, Options). +organize_resources(Targets, _State, tree(Tree), Options) :- + option(tree(true), Options), !, + create_tree(Targets, root, Tree, [add_to_root(true),targets(true)]). +/* option(tree(true), Options), !, + search_graph(State, SearchGraph), + option(end(End), Options, 10), + %rdf_equal(flor:isWNSubClass, Rel), + rdf_equal(skos:broader, Rel), + target_to_nodes(Targets, Rel, SearchGraph, Nodes0), + tree_abstract(Nodes0, End, Nodes), + nodes_to_tree(Nodes, Tree). + */ +organize_resources(Targets, State, clusters(Clusters), Options) :- + option(groupBy(concept), Options), + State \== [], !, + option(start(Start), Options, 0), + option(end(End), Options, 100), + search_graph(State, SearchGraph), + concepts_in_search_state(State, Concepts), + tree_abstract(Concepts, 10, Nodes0), + key_rank(Nodes0, reverse, Nodes), + group_items_by_nodes(Nodes, Targets, SearchGraph, Clusters0), + cluster_select(Clusters0, Start, End, Clusters). +organize_resources(Targets, State, clusters(Clusters), Options) :- + option(groupBy(GroupBy), Options), + GroupBy \== false, + State \== [], !, + option(start(Start), Options, 0), + option(end(End), Options, 100), + option(sort(SortMethod), Options, false), + search_graph(State, SearchGraph), + group_targets_by_value(GroupBy, Targets, SearchGraph, Clusters0, Options), + sort_cluster_pairs(Clusters0, SortMethod, SearchGraph, Clusters1), + cluster_select(Clusters1, Start, End, Clusters). +organize_resources(Targets, _State, Data, Options) :- + option(start(Start), Options, 0), + option(end(End), Options, 100), + elem_select(Targets, Start, End, Data). + +search_graph(State, Graph) :- + rdf_search_property(State, graph(Graph)), !. +search_graph(_, []). + +%% group_items_by_value(+GroupBy, +Targets, +SearchGraph, -Clusters, +Options) +% +% Clusters is a list of pairs with key is a value of GroupBy +% and the value a list of corresponding elements from Items. +group_targets_by_value(role, Targets, Graph, Clusters, _Options) :- !, + result_roles(Targets, Graph, Pairs0), + keysort(Pairs0, Pairs), + group_pairs_by_key(Pairs, Clusters). +group_targets_by_value(GroupBy, Targets, Graph, Clusters, Options) :- + ( memberchk(GroupBy, [path,spath]) + -> setting(search:search_path, Default), + option(searchPath(Method), Options, Default), + result_paths(Targets, Method, GroupBy, Graph, Pairs0) + ; property_value(Targets, GroupBy, Pairs0) + ), + keysort(Pairs0, Pairs), + group_pairs_by_key(Pairs, Clusters). + +%% result_paths(+Targets:score-resource, +Method, +Abstract, +Graph, +%% -Items:item(Resource,Score,Path)) is det. +% +% Add path between resource and query. + +result_paths(Targets, Method, Abstract, Graph, Items) :- + empty_path_cache(Cache), + cached_result_paths(Targets, Method, Abstract, Graph, Cache, _, Items). + +cached_result_paths([], _, _, _, Cache, Cache, []). +cached_result_paths([Target|T], Method, Abstract, Graph, CacheIn, CacheOut, [Pair|Pairs]) :- + target(Target, URI, _Score), + ( cached_search_path(Method, URI, Graph, CacheIn, CacheTmp, Path0), + abstract_path(Abstract, Path0, Path) + -> Pair = Path-(Target-Path0) + ; Pair = other-Target + ), + cached_result_paths(T, Method, Abstract, Graph, CacheTmp, CacheOut, Pairs). + +%% abstract_path(+Type, +Path, -Abstract) is det. +% +% * Type = path +% only abstract target +% +% * Type = spath +% abstract full path to schema level + +abstract_path(path, [R|Rest0], Path) :- + iface_abstract_class(R, Class), !, + strip_rdf_value([Class|Rest0], Path0), + partial_schema_path(Path0, Path). +abstract_path(spath, Path, SPath) :- !, + ( Path = [R] + -> iface_abstract_class(R, Class), + SPath = [Class] + ; strip_alignment(Path, Path1), + schema_path(Path1, SPath0), + canonical_path(SPath0, SPath) + ). + +%% result_roles(+Targets, +Graph, -Pairs) +% +% Key is the highest ranked predicate attached to target. + +result_roles([], _, []). +result_roles([Target|T], Graph, [Key-R|Ps]) :- + target(Target, R, _), + findall(Weight-P, + ( search_graph_rdf(Graph, R, P, V), + rdf_cluster:step_weight(P, V, Graph, Weight) + ), + Ps0 + ), + keysort(Ps0, Ps1), + reverse(Ps1, [_Score-P0|_]), + iface_abstract_predicate(P0, Key), + result_roles(T, Graph, Ps). + +%% property_value(+Targets, +P, -Pairs) is det. +% +% Pairs is a pair with the groupBy value and the result target. + +property_value([], _, []). +property_value([Target|T], P, [Value-Target|Rest]) :- + target(Target, URI, _), + ( atom(P), + rdf_has(URI, P, Value) + -> true + ; Value = other + ), + property_value(T, P, Rest). + +%% sort_clusters(+Clusters, +Method, +Graph, -Sorted) +% +% Sorted contains the clusters sorted by Method. + +sort_cluster_pairs(Pairs, false, _Graph, Pairs) :- !. +sort_cluster_pairs(Pairs, Method, Graph, Sorted) :- + clusters_add_sort_key(Pairs, Method, Graph, Keyed), + key_rank(Keyed, reverse, Sorted). + +clusters_add_sort_key([], _, _, []). +clusters_add_sort_key([C-Rs|T], Method, Graph, [Key-(C-Rs)|Rest]) :- + ( cluster_sort_key(C, Method, Graph, Key) + -> true + ; Key = 0 + ), + clusters_add_sort_key(T, Method, Graph, Rest). + +/* +cluster_sort_key(R, _, Graph, Score) :- + atom(R), + search_graph_node_score(Graph, R, Score), + !. +*/ +cluster_sort_key(P, score, _, Score) :- + atom(P),!, + ( graph_search:predicate_weight(P, Score) + -> true + ; Score = 0.2 + ). +cluster_sort_key([_|Path], score, _, Score) :- !, + path_list_score(Path, Score). +cluster_sort_key(Path, _, _, Score) :- + is_list(Path), !, + length(Path, Score). +cluster_sort_key(_, _, _, 0). + + +path_list_score([], 1). +path_list_score([P,_|T], Score) :- + ( graph_search:predicate_weight(P, Weight) + -> true + ; Weight = 0.2 + ), + path_list_score(T, Score0), + Score is Weight*Score0. + + +%% targets_to_graph(+Targets, +GraphType, -Graph, +Options) +% +% Graph contains all triples were resources occur as +% subject;predicate;object + +targets_to_graph(Targets, Type, Triples, Options) :- + maplist(target, Targets, URIs, _), + ( Type == subject + -> iface_subject_graph(URIs, Triples, Options) + ; Type == object + -> iface_object_graph(URIs, Triples, Options) + ; Type == predicate + -> iface_predicate_graph(URIs, Triples, Options) + ). + + +%% cluster_select(+Clusters, +Start, +End, -ReducedClusters) +% +% Reduced Clusers only contains the items in Cluster starting at Start +% and ending at End. + +cluster_select([], _, _, []). +cluster_select([C-Elems|T], Start, End, [C-Reduced|Rest]) :- + elem_select(Elems, Start, End, Reduced), + cluster_select(T, Start, End, Rest). + + +%% target(+Target, -URI, -Score, -Path). +%% target(+Target, -URI, -Score). +% +% Split up target. + +target(Score-URI-Path, URI, Score, Path) :- !. +target(Score-URI, URI, Score, _) :- !. +target(URI, URI, 1, _). + +target(Score-URI, URI, Score) :- !. +target(URI, URI, 1). + + +/*************************************************** +* display data +***************************************************/ + +%% result_display_data(+Results, -Data, +Options) +% +% Data is Results enriched with display information. +% Options are: +% * + +result_display_data(graph(Graph), _, graph(Graph, Display), Options) :- !, + graph_display_data(Graph, Display, Options). +result_display_data(tree(Tree), _, tree(Tree), _) :- !. +result_display_data(Results, State, Data, Options) :- !, + option(display(Ps), Options, []), + option(searchPath(Path), Options, false), + search_graph(State, Graph), + empty_path_cache(Cache), + ( Results = clusters(Clusters0) + -> Data = clusters(Clusters), + cluster_data(Clusters0, Ps, Path, Graph, Cache, _, Clusters) + ; result_data(Results, Ps, Path, Graph, Cache, _, Data) + ). + + +%% result_data(+Results, +Ps, +SearchPath, +Graph, +%% +CacheIn, -CacheOut, -Items) +% +% Items contains the Results enriched with the +% property-value pairs as defined in Ps and a search path description. + +result_data([], _, _, _, Cache, Cache, []) :- !. +result_data([Target|Rs], Ps, Path, Graph, CacheIn, CacheOut, + [item(R,S,P,Data)|Items]) :- + target(Target, R, S, P0), + result_path(Path, R, P0, Graph, CacheIn, CacheTmp, P), + iface_resource_properties(R, Ps, Data), + result_data(Rs, Ps, Path, Graph, CacheTmp, CacheOut, Items). + +result_path(false, _, _, _, Cache, Cache, []) :- !. +result_path(_, _, P, _, Cache, Cache, P) :- + nonvar(P), !. +result_path(_, _, _, [], Cache, Cache, []) :- !. +result_path(Method, R, _, Graph, CacheIn, CacheOut, Path) :- + ( Method == abstract + -> cached_search_path(best, R, Graph, CacheIn, CacheOut, P0), + abstract_path(spath, P0, Path) + ; cached_search_path(Method, R, Graph, CacheIn, CacheOut, Path) + ). + +%% cluster_data(+ClustersIn, +Ps, +SearchPath, +Graph, +%% +CacheIn, -CacheOut, -ClustersOut) +% +% ItemsOut contains the items from ItemsIn enriched with the +% property-value pairs as defined in Ps. + +cluster_data(Clusters0, Ps, Path, Graph, Cache, CacheTmp, Clusters) :- + cluster_data(Clusters0, Ps, Path, Graph, Cache, CacheTmp, Clusters1, Other), + ( Other = [] + -> Clusters = Clusters1 + ; flatten(Other, Items), + rdf_display_label(other, Label), + result_data(Items, Ps, Path, Graph, CacheTmp, _, ItemData), + append(Clusters1, [cluster(Label,other,ItemData)], Clusters) + ). + +cluster_data([], _, _, _, Cache, Cache, [], []) :- !. +cluster_data([other-Targets|Pairs], Ps, Path, Graph, CacheIn, CacheOut, Cs, [Targets|Other]) :- !, + cluster_data(Pairs, Ps, Path, Graph, CacheIn, CacheOut, Cs, Other). +cluster_data([Group-Targets|Pairs], Ps, Path, Graph, CacheIn, CacheOut, [Cluster|Cs], Other) :- + cluster_label(Group, Label), !, + Cluster = cluster(Label, Group, Items), + result_data(Targets, Ps, Path, Graph, CacheIn, CacheTmp, Items), + cluster_data(Pairs, Ps, Path, Graph, CacheTmp, CacheOut, Cs, Other). +cluster_data([_-Targets|Pairs], Ps, Path, Graph, CacheIn, CacheOut, Cs, [Targets|Other]) :- + cluster_data(Pairs, Ps, Path, Graph, CacheIn, CacheOut, Cs, Other). + + +%% cluster_label(+ClusterType, +Cluster, -Label) +% +% Label is a textual description for Cluster. + +cluster_label(more(R), Label) :- !, + iface_label(R, L), + atom_concat('other ', L, Label). +cluster_label(R, Label) :- + atom(R), !, + iface_label(R, Label). +%cluster_label(Path, PathTxt) :- +% path_txt_description(Path, PathTxt), !. + +/*************************************************** +* json output +***************************************************/ + +output_warning(json, Message) :- + Results = json([bindings=[]]), + reply_json(json([results=Results, data=[], warning=Message])). + + +%% write_data(+Data, +Options) +% +% Write data in JSON format to output stream. + +write_data(clusters(Clusters), _, Options) :- !, + Head = json([vars=[uri,score,path|Ps]]), + Results = json([clusters=Bindings]), + JSON = json([head=Head, results=Results]), + option(display(Ps), Options, []), + clusters_to_json(Clusters, Bindings), + reply_indent_json(JSON, Options). +write_data(graph(Graph, Display), _, Options) :- !, + JSON = json([graph=JSONGraph, display=JSONDisplay]), + option(graphOutput(Type), Options, pos), + graph_to_json(Type, Graph, JSONGraph, []), + graph_to_json(spo, Display, JSONDisplay, []), + reply_indent_json(JSON, Options). +write_data(tree(Tree), _, Options) :- + option(display(Ps), Options, []), + tree_to_json(Tree, Ps, JSONTree), + JSON = json([result=JSONTree + ]), + reply_indent_json(JSON, Options). +write_data(Items, Total, Options) :- + Head = json([totalResultsAvailable=Total, + vars=[uri,score,path|Ps] + ]), + Results = json([bindings=Bindings]), + JSON = json([head=Head, results=Results]), + option(display(Ps), Options, []), + items_to_json(Items, Bindings), + reply_indent_json(JSON, Options). + +reply_indent_json(JSON, Options) :- + option(indent(true), Options), !, + reply_json(JSON). +reply_indent_json(JSON, _) :- + reply_json(JSON, [width(0)]). + +%% items_to_json(+Items, -JSON) +% +% JSON contain a prolog json term for each elemeent from Items. + +items_to_json([], []) :- !. +items_to_json([item(URI,Score,Path,Info)|Items], [Row|Rows]) :- + path_label(Path, PathLabel), + Row = json([uri=URI,score=Score,path=PathLabel|Info]), + items_to_json(Items, Rows). + +%% items_to_json(+Items, -JSON) +% +% JSON contain a prolog json term for each elemeent from Items. + +clusters_to_json([], []). +clusters_to_json([cluster(Label, Group, Items)|T], [JSON|Rest]) :- + cluster_to_json(Group, GroupJSON), + JSON = json([label=Label, path=GroupJSON, items=ItemsJSON]), + items_to_json(Items, ItemsJSON), + clusters_to_json(T, Rest). + +cluster_to_json(more(R), [more,R]) :- !. +cluster_to_json(R, R). + +%% path_label(+ResourceList, -LabelList) +% +% LabelList contains all labels of ResourceList. + +path_label(Ps, Ls) :- + is_list(Ps), !, + path_label_list(Ps, Ls). +path_label(R, L) :- + atom(R), !, + iface_label(R,L). + +path_label_list([], []). +path_label_list([R|Rs], [L|Ls]) :- + iface_label(R, L), + path_label_list(Rs, Ls). + + + +%% elem_select(+List, +Start, +End, -SubList) +% +% SubList contains the elements from List starting at Start element +% up to the End element. + +elem_select(Items, Start, End , Items) :- + Start > End, !. +elem_select(Items, 0, End, Selected) :- !, + length(Items, Total), + ( Total =< End + -> Selected = Items + ; length(Selected, End), + append(Selected, _, Items) + ). +elem_select(Items, Start, End, Selected) :- + length(Items, Total), + length(L0, Start), + append(L0, Rest, Items), + ( End >= Total + -> Selected = Rest + ; Length is End - Start, + length(Selected, Length), + append(Selected, _, Rest) + ). + diff --git a/config-available/cluster_search.pl b/config-available/cluster_search.pl index 1f29429..2f93b19 100644 --- a/config-available/cluster_search.pl +++ b/config-available/cluster_search.pl @@ -3,3 +3,4 @@ /** <module> search based on clustering paths in the graph */ +:- use_module(api(cluster_search)). diff --git a/lib/cluster_search/filter.pl b/lib/cluster_search/filter.pl new file mode 100644 index 0000000..68bc57a --- /dev/null +++ b/lib/cluster_search/filter.pl @@ -0,0 +1,352 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(filter, + [ target_goal/3, % +Goal, +R, +Options + filter_to_goal/4, % +FilterList, +R, -Goal, +Options + + filter_from_parameters/2, % +Options, -Filter + filter_from_parameters/3, % +Options, -Filter, -Rest + filter_to_parameter/2, % +FilterList, -JSON + filter_to_json/2, + json_filter_to_prolog/2 + ]). + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/owl)). +:- use_module(library(semweb/rdf_label)). + +:- use_module(library(debug)). +:- use_module(library(error)). +:- use_module(library(option)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(assoc)). +:- use_module(library(settings)). + +:- use_module(rdf_graph). +:- use_module(rdf_search). +:- use_module(graph_search). +:- use_module(rdfs_plus_skos). + +:- use_module(library(http/json_convert)). +:- use_module(library(http/json)). + + +%% target_goal(+Goal, +R, -URI) +% +% Succeeds if R passes all Filters. + +target_goal(Goal, R, R) :- + Goal, !. + +%% filter_to_goal(+Filter, +R, -Goal, +Options) +% +% Goal is a prolog goal of Filter that succeeds if R passes all filters. +% Allowed filters: +% +% * keyword(Keyword) +% * type(Class) +% * prop(Prop, Value) +% * reachable(Prop, TransitiveProp, Concept) +% +% @see Options are described with rdfs_plus_skos/5 + +filter_to_goal(Filter, R, Goal, Options) :- + resource_ext_map(Options, RMap), + query_ext_map(Options, QMap), + filter_to_goal(Filter, R, RMap, QMap, Goal, Options). + +filter_to_goal(Filter, R, RMap, QMap, Goal, Options) :- + is_list(Filter), !, + ( Filter = [] + -> Goal = true + ; select(keyword(Keyword), Filter, Filter1) + -> search_filter(Filter1, SearchFilter), + graph_search(Keyword, State, [filter(SearchFilter)|Options]), + rdf_search_property(State, state_targets(Assoc)), + Goal = gen_assoc(R, Assoc, _) + ; filter_to_conj_goal(Filter, R, RMap, QMap, Goal) + ). +filter_to_goal(Filter, R, RMap, QMap, Goal, _) :- + filter(Filter, R, RMap, QMap, Goal). + +filter_to_goal(Filter, R, RMap, QMap, Goal) :- + filter(Filter, R, RMap, QMap, Goal). + +search_filter([], [type(Class)]) :- !, + setting(search:basic_search_target, Class). +search_filter(Filter, Filter). + +%% filter_to_conj_goal(+FilterList, +R, -Goal) +% +% Goal is conjuctive prolog goal for FilterList. + +filter_to_conj_goal([Filter], R, RMap, QMap, Goal) :- !, + filter(Filter, R, RMap, QMap, Goal). +filter_to_conj_goal([Filter|T], R, RMap, QMap, (Goal,Rest)) :- + filter(Filter, R, RMap, QMap, Goal), + filter_to_conj_goal(T, R, RMap, QMap, Rest). + +%% filter_to_conj_goal(+FilterList, +R, RMap, QMap, -Goal) +% +% Goal is disjunctive prolog goal for FilterList. + +filter_to_disj_goal([Filter], R, RMap, QMap, Goal) :- !, + filter(Filter, R, RMap, QMap, Goal). +filter_to_disj_goal([Filter|T], R, RMap, QMap, (Goal;Rest)) :- + filter(Filter, R, RMap, QMap, Goal), + filter_to_disj_goal(T, R, RMap, QMap, Rest). + +%% filter(+Filter, -Goal, ?R) +% +% Goal is a prolog goal that succeeds if R passes Filter. + +filter(or(Filter), R, RMap, QMap, Goal) :- !, + filter_to_disj_goal(Filter, R, RMap, QMap, Goal). +filter(or(F1, F2), R, RMap, QMap, Goal) :- !, + Goal = (G1 ; G2), + filter_to_goal(F1, R, RMap, QMap, G1), + filter_to_goal(F2, R, RMap, QMap, G2). +filter(type(Class), R, _, _, Goal) :- !, + ( rdf_equal(Class,rdfs:'Resource') + -> Goal = true + ; rdf_equal(Type, rdf:type), + rdf_equal(SubClass, rdfs:subClassOf), + Goal = ( rdf(R,Type,C), + rdf_reachable(C,SubClass,Class) + ) + ). +filter(prop(P, V), R, RMap, QMap, Goal) :- !, + ( P = all + -> Goal = rdfs_plus_skos(RMap,QMap, R, _, V) + ; Goal = rdfs_plus_skos(RMap,QMap, R, P, V) + ). +filter(propsearch(P, Search), R, RMap, QMap, Goal) :- !, + kwd_search:find_literals(Search, Literals, []), + filter(prop(P, V), R, RMap, QMap, Goal0), + findall(O, ( member(_-L,Literals), + ( O = literal(L) + ; rdfs_plus_skos(RMap,QMap, O, rdfs:label, literal(L)) + ) + ), + Vs), + Goal = ( member(V,Vs), + Goal0 + ). +filter(reachable(TransP, C), R, _, _, Goal) :- !, + Goal = rdf_reachable(R, TransP, C). +filter(reachable(P, TransP, C), R, RMap, QMap, Goal) :- !, + Goal = ( rdfs_plus_skos(RMap,QMap, R, P, V), + rdf_reachable(V, TransP, C) + ). +filter(value(V), R, RMap, QMap, Goal) :- !, + Goal = rdfs_plus_skos(RMap,QMap, R, _, V). +filter(valueOfProp(P), R, RMap, QMap, Goal) :- !, + Goal = rdfs_plus_skos(RMap,QMap, _, P, R). +filter(valueOfProp(P, Filter), R, RMap, QMap, Goal) :- !, + Goal = ( rdfs_plus_skos(RMap,QMap, S, P, R), + Rest + ), + filter_to_goal(Filter, S, RMap, QMap, Rest). +filter(metadata(Class), R, RMap, QMap, Goal) :- !, + rdf_equal(Type, rdf:type), + rdf_equal(SubClass, rdfs:subClassOf), + Goal = ( rdf(R, Type, C), + rdf_reachable(C, SubClass, Class) + ; rdfs_plus_skos(RMap,QMap, S,_,R), + rdf(S, Type, C), + rdf_reachable(C, SubClass, Class) + ). +filter(metaclass(MetaClass), R, _, _, Goal) :- !, + rdf_equal(Type, rdf:type), + rdf_equal(SubClass, rdfs:subClassOf), + Goal = ( rdf(R, Type, C), + rdf_reachable(C, SubClass, Class), + rdf(Class, Type, MetaClass) + ). +filter(equal(R1), R, _, _, Goal) :- !, + Goal = rdf_equal(R1, R). +filter(ns(Ns), R, _, _, Goal) :- !, + Goal = ( ground(R+Ns) + -> sub_atom(R, _, _, _, Ns) + ; rdf_url_namespace(R, Ns) + ). +filter(alias(Alias), R, _, _, Goal) :- !, + Goal = rdf_global_id(Alias:_, R). +/* +filter(group(P, [Value]), R, _, _, Goal) :- !, + ( Value == other + -> Goal = (\+ iface_has(0, 0, R, P, Value, _)) + ; Goal = iface_has(0, 0, R, P, Value, _) + ). +*/ +filter(owl_satisfies(Range), R, _, _, Goal) :- !, + Goal = owl_satisfies(Range, R). +filter(Filter, _, _, _, _) :- + domain_error(filter, Filter). + + +%% filter_from_parameters(+OptionList, -Filter) is det. +%% filter_from_parameters(+OptionList, -Filter, -Rest) is det. +% +% Filter is a list of filters that occur in the OptionList + +filter_from_parameters(Options, Filter) :- + filter_from_parameters(Options, Filter, _). + +filter_from_parameters(Options, Filter, Rest) :- + filters(Options, Filter0, Rest), + filter_value_to_literal(Filter0, Filter). + +%% json_filter_to_prolog(+JSONAtom, -Filter) +% +% Filter is a prolog term representation of the filter in +% JSONAtom. + +json_filter_to_prolog(List, Filter) :- + list_atom_json_to_term(List, AtomList), + json_to_prolog(AtomList, Filter0), + filter_value_to_literal(Filter0, Filter). + +list_atom_json_to_term([], []). +list_atom_json_to_term([Atom|As], [Term|Ts]) :- + atom_json_term(Atom,Term,[]), + list_atom_json_to_term(As, Ts). + + +%% filters(+Options, -Filters, -Rest) +% +% Directly takes some filters that are defined in +% filter_parameter/2 from the OptionList. +% +% @tbd Remove old style if SWI-Prolog 5.7.14 is current + +filters([], [], []). +filters([O|Os], Filters, Rest) :- + ground(O), % non-ground are optional parameters + filter_parameter(O, F), !, + ( is_list(F) + -> append(F, Fs, Filters) + ; Filters = [F|Fs] + ), + filters(Os, Fs, Rest). +filters([O|Os], Fs, [O|Rest]) :- + filters(Os, Fs, Rest). + +% New style for post-processing after http_parameters +filter_parameter(filter(List), List). +filter_parameter(query(Keyword), keyword(Keyword)). +filter_parameter(type(Class), type(Class)). +filter_parameter(ns(NS), ns(NS)). +filter_parameter(alias(Alias), alias(Alias)). +% old style for using the raw form-data +filter_parameter(filter=Atom, Term) :- + atom_json_term(Atom,JSON,[]), + json_to_prolog(JSON, Term). +filter_parameter(query=Query, keyword(Query)) :- !. +filter_parameter(type=Class, type(Class)) :- !, + Class \== ''. +filter_parameter(ns=Ns, ns(Ns)) :- !. +filter_parameter(alias=Alias, alias(Alias)) :- !. +filter_parameter(Prop=Value, prop(Prop,Value)) :- + rdf_current_predicate(Prop). + + +%% filter_value_to_literal(+FilterIn, -FilterOut) +% +% Replaces values with literal terms, in case the value is not a URI. + +filter_value_to_literal([], []). +filter_value_to_literal([Filter0|Fs], [Filter|Rest]) :- + ( Filter0 = prop(Prop,A) + -> property_atom_to_literal(A, Value), + Filter = prop(Prop,Value) + ; Filter = Filter0 + ), + filter_value_to_literal(Fs, Rest). + +property_atom_to_literal(A, V) :- + ( rdf_subject(A) + -> V = A + ; V = literal(A) + ). + + +%% filter_to_parameter(+Filter, -JSON) +% +% JSON is a prolog JSON term from Filter. +% Uses json_object declarations. + +filter_to_parameter([], []). +filter_to_parameter([Filter|T], [Object|Rest]) :- + Object = json([filter=JSONFilter, label=Label]), + filter_label(Filter, Label), + prolog_to_json(Filter, JSONFilter), + filter_to_parameter(T, Rest). + +%% filter_to_parameter(+Filter, -JSON) +% +% JSON is a prolog JSON term from Filter. +% Uses json_object declarations. + +filter_to_json([], []). +filter_to_json([Filter|T], [JSONFilter|Rest]) :- + prolog_to_json(Filter, JSONFilter), + filter_to_json(T, Rest). + +%% filter_label(+Filter, -Label) +% +% Label is a human readable description of filter. + +/* filter_label(keyword(A), L) :- !, + interface_label(keyword, Keyword), + atomic_list_concat([Keyword, ':', A], L). +*/ +filter_label(type(Class), L) :- !, + rdf_display_label(Class, L). +filter_label(_Term, 'test'). + + + +:- json_object + or(or:_), + type(type:atom), + metaclass(metaclass:atom), + keyword(keyword:atom), + metadata(metadata:atom), + metadataOf(metadataOf:atom), + prop(prop:atom, object:_), + prop(prop:atom, uri:atom), + propsearch(prop:atom, text:atom), + prop(prop:atom), + value(object:atom), + valueOfProp(valueOfProp:atom), + reachable(rel:atom, object:atom), + reachable(prop:atom, rel:atom, uri:atom), + group(cluster:atom, values:list), + + literal(value:_) + [type=literal], + type(type:atom, literal:atom), + lang(lang:atom, literal:atom). diff --git a/lib/cluster_search/fuzzy.pl b/lib/cluster_search/fuzzy.pl new file mode 100644 index 0000000..b01d46a --- /dev/null +++ b/lib/cluster_search/fuzzy.pl @@ -0,0 +1,293 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(fuzzy, + [ stem_literals/0, + fuz_find_literal/3, % +Spec, +MatchType, -Literal + stem_find_literal/2, % +Spec, -Literal + literal_distance/3 % +L1, +L2, -D + ]). +:- use_module(library('semweb/rdf_db')). +:- use_module(library('semweb/rdf_litindex')). +:- use_module(library('http/dcg_basics')). +:- use_module(library(debug)). +:- use_module(library(lists)). +:- use_module(library(find_resource)). +:- use_module(library(porter_stem)). + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +This module finds literals of the RDF database based on stemming and +being flexible to ordering of tokens. +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +%% stem_literals +% +% Create stemming database. As we now use library(rdf_litindex), +% which generates and maintains the database on the fly we simply +% do a call that forces initialization of the database. + +stem_literals :- + rdf_find_literals(stem(foobar), _). + + +%% stem_find_literal(+TextOrTokens, -Literal) +% +% Find literal values that contain all stems from Text. Text is +% either an atom or a list of tokens as produced by +% rdf_tokenize_literal/2. + +stem_find_literal(Text, Literal) :- + find_literal(Text, stem, Literal). + + +%% fuz_find_literal(+TextOrTokens, +MatchType, -Literal) +% +% Find literal values that contain all Text where the +% matching is determined by MatchType. +% Text is either an atom or a list of tokens as produced by +% rdf_tokenize_literal/2. +% MatchType is one of case, stem or prefix. + + +fuz_find_literal(Text, MatchType, Literal) :- + tokens(Text, Tokens), + list_to_and(Tokens, MatchType, And), + rdf_find_literals(And, Literals), + member(Literal, Literals). + + +%% list_to_and(+TextOrTokens, +MatchType, -AndTerm) +% +% AndTerm is a conjunction of terms constructed from +% MatchType and Token. +% +% For performance reasons a conjunction of prefix items +% is not made. Instead only the last conjunct gets a prefix. + +list_to_and([], _, true). +list_to_and([One], MatchType, Match) :- !, + mkmatch(MatchType, One, Match). +list_to_and([H|T], prefix, and(Match, And)) :- !, + mkmatch(stem, H, Match), + list_to_and(T, prefix, And). +list_to_and([H|T], MatchType, and(Match, And)) :- + mkmatch(MatchType, H, Match), + list_to_and(T, MatchType, And). + + +mkmatch(_, Number, Number) :- + number(Number), !. +mkmatch(stem, Token, stem(Token)). +mkmatch(prefix, Token, prefix(Token)). +mkmatch(case, Token, case(Token)). + + /******************************* + * DISTANCE * + *******************************/ + +goal_expansion(forall(C0, A0), \+ (C, \+ A)) :- + expand_goal(C0, C), + expand_goal(A0, A). + + +%% literal_distance(+Lit1, +Lit2, -Distance) +% +% Compute the distance between two literals. Distance values are +% >= 0, where 0 means perfect match. Handicaps are given to +% inserted, deleted, moved and modified tokens. See the above URL +% for a description of the best edit-sequence comparing strings. + +literal_distance(L1, L2, Distance) :- + tokens(L1, TL1), + tokens(L2, TL2), + length(TL1, N1), + length(TL2, N2), + abs(N1-N2) =< min(N1, N2), % too much difference in length + cheapest_edit_path(TL1, TL2, EditCost, _Path), + Distance is EditCost/max(N1,N2). + +tokens(Tokens, Tokens) :- + is_list(Tokens), !. +tokens(Spec, Tokens) :- + atom(Spec), !, + rdf_tokenize_literal(Spec, Tokens). +tokens(Number, [Number]) :- + number(Number). + + +%% cheapest_edit_path(+List1, +List2, -Distance, -Path) +% +% Compute the cheapest edit path. As edit operations are weighted, +% this is not necessarily the shorted one, but the algorithm is +% basically the same. +% +% @see http://www.ling.ohio-state.edu/~cbrew/2002/winter/684.02/string-distance.html + +cheapest_edit_path(Toks1, Toks2, Distance, Path) :- +% DelCost = 10, % Delete token +% InsCost = 8, % Insert token + MatchCost = 0, % Matched token + CaseCost = 1, % Different case + DWIMCost = 3, % Spelling error + StemCost = 5, % Tokens have same stem + SubstCost = 10, % Replaced token + MovCost = 2, % Token is moved + + T1 =.. [string|Toks1], + T2 =.. [string|Toks2], + functor(T1, _, M), + functor(T2, _, N), + X is M + 1, + Y is N + 1, + M1 is M - 1, + N1 is N - 1, + new_array(X, Y, Array), + nb_set_array(Array, 0, 0, c(0, [])), + forall(between(0, M1, I), + ( get_array(Array, I, 0, c(V0, P0)), + I1 is I+1, + arg(I1, T1, D), + del_cost(D, DC), + V is V0 + DC, + nb_link_array(Array, I1, 0, c(V, [del(D)|P0])) + )), + forall(between(0, N1, J), + ( get_array(Array, 0, J, c(V0, P0)), + J1 is J+1, + arg(J1, T2, I), + ins_cost(I, IC), + V is V0 + IC, + nb_link_array(Array, 0, J1, c(V, [ins(I)|P0])) + )), + forall(between(0, M1, I), + forall(between(0, N1, J), + ( I1 is I + 1, + J1 is J + 1, + arg(I1, T1, V1), + arg(J1, T2, V2), + ( V1 == V2 + -> Subst = MatchCost + ; downcase_atom(V1, L), + downcase_atom(V2, L) + -> Subst = CaseCost + ; dwim_match(V1, V2) + -> Subst = DWIMCost + ; same_stem(V1, V2) + -> Subst = StemCost + ; Subst = SubstCost + ), + get_array(Array, I, J, c(C1, P1)), + get_array(Array, I1, J, c(C2, P2)), + get_array(Array, I, J1, c(C3, P3)), + + SubstC is C1 + Subst, + ( memberchk(del(V2), P2) + -> del_cost(V2, DC2), + InsC is C2 - DC2 + MovCost + ; ins_cost(V2, InsCost), + InsC is C2 + InsCost + ), + ( memberchk(ins(V1), P3) + -> ins_cost(V1, IC1), + DelC is C3 - IC1 + MovCost + ; del_cost(V1, DelCost), + DelC is C3 + DelCost + ), + + ( SubstC < InsC + -> ( SubstC < DelC + -> nb_link_array(Array, I1, J1, + c(SubstC, [subst(V1, V2)|P1])) + ; nb_link_array(Array, I1, J1, + c(DelC, [del(V1)|P3])) + ) + ; ( DelC < InsC + -> nb_link_array(Array, I1, J1, + c(DelC, [del(V1)|P3])) + ; nb_link_array(Array, I1, J1, + c(InsC, [ins(V2)|P2])) + ) + )))), +% pp_array(Array), + get_array(Array, M, N, c(Distance, Path0)), + reverse(Path0, Path). + +ins_cost((,), 1) :- !. +ins_cost(_, 8). + +del_cost((,), 1) :- !. +del_cost(_, 10). + +same_stem(T1, T2) :- + atom(T1), atom(T2), !, + porter_stem(T1, Stem), + porter_stem(T2, Stem). + + + /******************************* + * SIMPLE ARRAY PACKAGE * + *******************************/ + +new_array(X, Y, Array) :- + Size is X * Y + 3, + functor(Array, array, Size), + arg(1, Array, 2), % dimensions + arg(2, Array, X), % columns + arg(3, Array, Y). % rows + +get_array(Array, X, Y, Val) :- + arg(2, Array, Cols), + Pos is X + Y*Cols + 4, + arg(Pos, Array, Val). + +nb_set_array(Array, X, Y, Val) :- + arg(2, Array, Cols), + Pos is X + Y*Cols + 4, + nb_setarg(Pos, Array, Val). + +nb_link_array(Array, X, Y, Val) :- + arg(2, Array, Cols), + Pos is X + Y*Cols + 4, + nb_linkarg(Pos, Array, Val). + + /******************************* + * DEBUGGING * + *******************************/ + +end_of_file. + +pp_array(Array) :- + functor(Array, array, _), + arg(1, Array, 2), + arg(2, Array, Cols), + arg(3, Array, Rows), + MaxX is Cols-1, + MaxY is Rows-1, + forall(between(0, MaxY, Y), + ( forall(between(0, MaxX, X), + ( get_array(Array, X, Y, V), + format('~w ', [V]) + )), + nl + )). + diff --git a/lib/cluster_search/graph_search.pl b/lib/cluster_search/graph_search.pl new file mode 100644 index 0000000..319780f --- /dev/null +++ b/lib/cluster_search/graph_search.pl @@ -0,0 +1,189 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(graph_search, + [ graph_search/3, % +Query, -RDFState, +Options + flush_graph_search_cache/0 + ]). + +:- use_module(library(semweb/rdf_db)). +% :- use_module(library(semweb/owl')). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(assoc)). +:- use_module(library(settings)). + +% util modules +:- use_module(rdf_graph). +:- use_module(rdf_search). +:- use_module(rdf_direct_search). +:- use_module(rdf_hierarchy_search). +:- use_module(rdf_backward_search). +:- use_module(rdf_full_search). +:- use_module(filter). + + +/*************************************************** +* Settings +***************************************************/ + +:- setting(search:graphSearchCache, boolean, true, + 'Cache graph search results'). +:- setting(search:graphSearchCacheSize, nonneg, 50, + 'Maximum searches kept in cache (0 is unbound)'). + +/*************************************************** +* graph search +***************************************************/ + +%% graph_search(+Keyword, -RDFState, +Options) is det. +% +% Start search from Keyword. Options is a list of: +% +% * steps(+Integer) +% Max number of nodes to expand from the agenda +% +% * prune(+Boolean) +% If =true= (default), prune all failed paths from +% the graph. +% +% * threshold(+Threshold) +% Search cut-off threshold +% +% @tbd Merge with find/3 from biggraphs.pl + +:- rdf_meta + graph_search(+, -, t). +:- dynamic + graph_search_cache/4, % +Hash, +Kwd, +Options, -State + cache_generation/1. + +graph_search(Kwd, State, Options) :- + setting(search:graphSearchCache, false), !, + do_graph_search(Kwd, State, Options). +graph_search(Kwd, State, Options) :- + check_cache_validity, + term_hash(Kwd+Options, Key), + ( graph_search_cache(Key, Kwd, Options, State) + -> true + ; do_graph_search(Kwd, State, Options), + push_graph_search_cache(Key, Kwd, Options, State) + ). + +%% check_cache_validity is det. +% +% Remove the cache if it is not valid. We use rdf_generation/1 for +% this. This means any change to the RDF database invalidates the +% cache. Ideally we should restrict this to named graphs used in +% the search, but that is not easy to define. + +check_cache_validity :- + cache_generation(CacheGen), + rdf_generation(CacheGen), !. +check_cache_validity :- + rdf_generation(CacheGen), + retractall(cache_generation(_)), + assert(cache_generation(CacheGen)), + flush_graph_search_cache. + + +%% flush_graph_search_cache is det. +% +% Flushes the cache for graph_search/3. + +flush_graph_search_cache :- + retractall(graph_search_cache(_,_,_,_)). + +%% push_graph_search_cache(+Key, +Keyword, +Options, -State) is det. +% +% Add a result to the search cache. Note that we first check +% whether whether the result was added to avoid the not so +% uncommon case that two threads compute the same result +% concurrently. + +push_graph_search_cache(Key, Kwd, Options, State) :- + graph_search_cache(Key, Kwd, Options, State), !. +push_graph_search_cache(Key, Kwd, Options, State) :- + setting(search:graphSearchCacheSize, Max), + ( Max == 0 + -> true + ; predicate_property(graph_search_cache(_,_,_,_), + number_of_clauses(Size)), + Size > Max + -> once(retract(graph_search_cache(_,_,_,_))) + ; true + ), + assertz(graph_search_cache(Key, Kwd, Options, State)). + + +do_graph_search(Kwd, State, Options) :- + setting(search:search_type, Default), + option(filter(Filter0), Options, []), + filter:search_filter(Filter0, Filter), + option(search_type(Type), Options, Default), + filter_to_goal(Filter, R, Goal, Options), + TargetCond = target_goal(Goal, R), + ( Type == concept + -> rdf_concept_search(Kwd, TargetCond, State, Options) + ; Type == literal + -> rdf_literal_search(Kwd, TargetCond, State, Options) + ; rdf_backward_search(Kwd, TargetCond, State, Options) + ), + ( option(prune(true), Options) + -> prune(State, []) + ; option(prune(full), Options) + -> prune(State, [recursive]) + ; true + )/*, + ( search_graph_filter_option(Options) + -> rdf_search_property(State, graph(Graph0)), + search_graph_rdf_graph(Graph0, RDF0), + big_graphs:filter_rdf(RDF0, RDF, Options) + %search_graph_rdf_graph(Graph, RDF), + %rdf_search:set_graph_of_state(Graph, State) + ; true + )*/. + + +predicate_weight(P, W) :- % hack + rdf_backward_search:predicate_weight(P, W). + + +%% prune(!State) is det. +% +% Prune paths from the graph that do not end in a target. + +prune(State, Options) :- + rdf_prune_search(State, Options), + rdf_search_property(State, graph_size(Nodes)), + debug(rdf_search, 'After prune: ~D nodes in graph', [Nodes]). + + +search_graph_filter_option(Options) :- + memberchk(merge_sameas(true), Options). +search_graph_filter_option(Options) :- + memberchk(bagify(true), Options). +search_graph_filter_option(Options) :- + memberchk(abstract(true), Options). diff --git a/lib/cluster_search/graph_util.pl b/lib/cluster_search/graph_util.pl new file mode 100644 index 0000000..0e7ce36 --- /dev/null +++ b/lib/cluster_search/graph_util.pl @@ -0,0 +1,100 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + + +:- module(graph_util, + [ graphs_unique_elements/5, % +Graphs, -Ss, -Ps, -Os, -Srcs + graph_unique_elements/5, % +Graph, -Ss, -Ps, -Os, -Srcs + graph_display_data/3 % +Graph, -DisplayGraph, +Options + ]). + +:- use_module(iface_util). +:- use_module(rdfs_plus_skos). + +%% graph_display_data(+Graph, -DisplayGraph, +Options) +% +% DisplayGraph contains triples describing the subj,pred,obj. + +graph_display_data(Graph, DisplayGraph, Options) :- + option(rdfs_plus_skos(Reasoning), Options), + resource_ext_map(Reasoning, EMap), + query_ext_map(Reasoning, QMap), + list_option(subjectDisplay(SP), Options, [label]), + list_option(predicateDisplay(PP), Options, [label]), + list_option(objectDisplay(OP), Options, [label]), + graph_unique_elements(Graph, Ss, Ps, Os, _), + iface_graph_add_properties(Ss, SP, [], EMap, QMap, Graph1), + iface_graph_add_properties(Ps, PP, Graph1, EMap, QMap, Graph2), + iface_graph_add_properties(Os, OP, Graph2, EMap, QMap, DisplayGraph). + +%% graphs_unique_elements(Graph, Ss, Ps, Os, SRCs) +% +% extract all unique resources from Graph. + +graphs_unique_elements(Graphs, Ss, Ps, Os, SRCs) :- + graphs_elements(Graphs, Ss0, [], Ps0, [], Os0, [], SRCs0, []), + sort(Ss0, Ss), + sort(Ps0, Ps), + sort(Os0, Os), + sort(SRCs0, SRCs). + +graph_unique_elements(Graph, Ss, Ps, Os, SRCs) :- + graph_elements(Graph, Ss0, [], Ps0, [], Os0, [], SRCs0, []), + sort(Ss0, Ss), + sort(Ps0, Ps), + sort(Os0, Os), + sort(SRCs0, SRCs). + + +graphs_elements([], Ss, Ss, Ps, Ps, Os, Os, SRCs, SRCs). +graphs_elements([_Type-Graph|Graphs], Ss0, Ss, Ps0, Ps, Os0, Os, SRCs0, SRCs) :- + graph_elements(Graph, Ss0, Ss1, Ps0, Ps1, Os0, Os1, SRCs0, SRCs1), + graphs_elements(Graphs, Ss1, Ss, Ps1, Ps, Os1, Os, SRCs1, SRCs). + + +graph_elements([], Ss, Ss, Ps, Ps, Os, Os, SRCs, SRCs). +graph_elements([Triple|T], [S|Ss0], Ss, [P|Ps0], Ps, Os1, Os, SRCs1, SRCs) :- + ( Triple = rdf(S,P,O,SRC:_) + -> SRCs1 = [SRC|SRCs0] + ; Triple = rdf(S,P,O,SRC) + -> SRCs1 = [SRC|SRCs0] + ; Triple = rdf(S,P,O) + -> SRCs1 = SRCs0 + ), + ( O = literal(_) + -> Os1 = Os0 + ; Os1 = [O|Os0] + ), + graph_elements(T, Ss0, Ss, Ps0, Ps, Os0, Os, SRCs0, SRCs). + + +%% list_option(?Opt, +Options, ?Default) +% +% Same as option/3, but uses Default in case Opt is an empty list. + +list_option(Opt, Options, _Default) :- + option(Opt, Options, []), + \+ arg(1, Opt, []), + !. +list_option(Opt, _Options, Default) :- + arg(1, Opt, Default). diff --git a/lib/cluster_search/iface_util.pl b/lib/cluster_search/iface_util.pl new file mode 100644 index 0000000..30c5990 --- /dev/null +++ b/lib/cluster_search/iface_util.pl @@ -0,0 +1,989 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(iface_util, + [ iface_resource_values/3, % +URI, +Properties, -Values + iface_resource_values/5, % +EMap, +QMap, +URI, +Properties, -Values + iface_resource_properties/3, % +URI, +Properties, -Pairs + iface_resource_properties/4, % +URI, +Properties, -Pairs, +Options + iface_key_resource_graph/4, % +URIList, +Pairs, -Graph, +Options + + iface_subject_graph/3, % +URI, -Graph, +Options + iface_object_graph/3, % +Resource, -Graph, +Options + iface_predicate_graph/3, % +URI, -Graph, +Options + + iface_graph_extend/5, % +URIs, +Ps, +GraphIn, -GraphOut, +Options + iface_graph_add_properties/6, % +URIs, +Ps, +GraphIn, +ExtMap, +QueryMap, -GraphOut + iface_language_filter/3, % +GraphIn, +LanguageList, -GraphOut + iface_used_namespaces/2, % +Graph, -Namespace + + iface_has/3, % ?Subject, ?Predicate, ?Object + iface_has/6, % +ExtMap, +QueryMap, ?Subject, ?Predicate, ?Object, -RealP + iface_pref_value/3, % ?Subject, -Predicate, -Object + iface_pref_value/6, % +ExtMap, +QueryMap, ?Subject, ?Predicate, ?Object, -RealP + iface_literal_value/3, % ?Subject, -Predicate, -Literal + iface_literal_value/6, % +ExtMap, +QueryMap, ?Subject, ?Predicate, ?Literal, -RealP + iface_pref_literal_value/3, % ?Subject, -Predicate, -Literal + iface_pref_literal_value/6, % +ExtMap, +QueryMap, ?Subject, ?Predicate, ?Literal, -RealP + + iface_label/2, % +Resource, -Label + iface_label/4, % +ExtMap, +QueryMap, +Resource, -Label + iface_match_label/3, % +Resource, +Match, -Label + iface_match_label/5, % +ExtMap, +QueryMap, +Resource, +Match, -Label + iface_class_of/2, % +Resource, -Class + iface_class_of/4, % +ExtMap, +QueryMap, +Resource, -Class + iface_symbol_of/2, % +URI, -Symbol + iface_thumbnail/2, % +Resource, -ThumbnailURL + iface_image/2, % +Resource, -ImageURL + iface_video/2, % +Resource, -VideoURL + iface_description/2, % +Resource, -Txt + iface_abbreviation/2, % +Resource, -Txt + iface_table_property/2, % +Class, -Property + iface_pref_property/2, % +Property, -Preferred + iface_abstract_predicate/2, % +Predicate, -Abstract + iface_abstract_class/2, % +Class, -Abstract + iface_concept/1 % ?Resource + ]). + + + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(http/http_dispatch)). +:- use_module(library(error)). +:- use_module(library(count)). +:- use_module(library(settings)). +:- use_module(rdfs_plus_skos). +:- use_module(api(media_caching)). + +:- rdf_register_prefix(iface, 'http://e-culture.multimedian.nl/ns/interface/'). + +:- rdf_meta + iface_subject_graph(r, -, +), + iface_object_graph(r, -, +), + iface_predicate_graph(r, -, +), + + iface_has(r,r,o), + iface_has(+,+,r,r,o,-), + iface_pref_value(r,r,-), + iface_pref_value(+,+,r,r,-,_), + iface_literal_value(r,r,o), + iface_literal_value(+,+,r,r,o,-), + iface_pref_literal_value(r,r,o), + iface_pref_literal_value(+,+,r,r,o,-), + + iface_resource_values(r,t,-), + iface_resource_values(+,+,r,t,-), + + iface_class_of(r,-), + iface_pref_property(r,r). + + +:- discontiguous + term_expansion/2. + +:- multifile + english_language_tag/1. + + +:- if(current_setting(user:lang)). +% do nothing +:- else. +:- setting(user:lang, atom, en, 'Preferred language'). +:- endif. + /****************************** + * value lists * + ******************************/ + +%% iface_resource_values(+URI, +Ps, -Values). +%% iface_resource_values(+URI, +Ps, -Values, +Options). +% +% Values contains the values of all properties in Ps for URI. +% By default rdf_has is used unless options are specified: + +iface_resource_values(URI, Ps0, Vs) :- + rdf_global_term(Ps0, Ps), + iface_resource_values_(Ps, 0, 0, URI, Vs). +iface_resource_values(EMap, QMap, URI, Ps0, Vs) :- + rdf_global_term(Ps0, Ps), + iface_resource_values_(Ps, EMap, QMap, URI, Vs). + +iface_resource_values_([], _, _, _, []) :- !. +iface_resource_values_([P|Ps], EMap, QMap, URI, [V|Vs]) :- + iface_has(EMap, QMap, URI, P, V, _), !, + iface_resource_values_(Ps, EMap, QMap, URI, Vs). +iface_resource_values_([_|Ps], EMap, QMap, URI, Vs) :- + iface_resource_values_(Ps, EMap, QMap, URI, Vs). + +%% iface_resource_properties(+URI, +Ps, -Pairs). +%% iface_resource_properties(+URI, +Ps, -Pairs, +Options). +% +% Pairs contains a key=value pair for each element from Ps. +% Ps is either a list of properties in which case key is a property, +% or Ps is a list of property-key pairs in which case property is used +% to get the value and key is returned with the value. +% +% @see iface_resource_values + +iface_resource_properties(R, Ps0, Pairs) :- + rdf_global_term(Ps0, Ps), + iface_resource_properties_(Ps, 0, 0, R, Pairs). + +iface_resource_properties(R, Ps0, Pairs, Options) :- + rdf_global_term(Ps0, Ps), + resource_ext_map(Options, EMap), + query_ext_map(Options, QMap), + iface_resource_properties_(Ps, EMap, QMap, R, Pairs). + +iface_resource_properties_([], _, _, _, []). +iface_resource_properties_([P0|Ps], EMap, QMap, R, [Key=V|Pairs]) :- + ( P0 = P-Key + -> iface_has(EMap, QMap, R, P, O, _) + ; iface_has(EMap, QMap, R, P0, O, _), + Key = P0 + ), + !, + object_value(O, V), + iface_resource_properties_(Ps, EMap, QMap, R, Pairs). +iface_resource_properties_([_|Ps], EMap, QMap, R, Pairs) :- + iface_resource_properties_(Ps, EMap, QMap, R, Pairs). + + +iface_key_resource_graph(Rs, Ps0, Graph, Options) :- + rdf_global_term(Ps0, Ps), + resource_ext_map(Options, EMap), + query_ext_map(Options, QMap), + findall(rdf(R,Key,V), + ( member(R, Rs), + member(P-Key, Ps), + iface_has(EMap, QMap, R,P,V, _) + ), + Graph + ). + +object_value(literal(L), Txt) :- !, + literal_text(L, Txt). +object_value(L, L) :- + is_list(L), !. +object_value(R, Txt) :- + rdf_subject(R), !, + iface_label(R, Txt). +object_value(R, R). + + /****************************** + * graphs * + ******************************/ + +%% iface_subject_graph(+URIs, -Graph, +Options) +% +% The "description graph" contains all triples/quads +% with URI as the subject. +% +% Options is a list of: +% +% * predicate(+Predicate) +% restrict to objects of Predicate (no subPropertyOf) +% +% * lens(+Lens) +% restrict to properties of Lens. +% if Lens==true a lens matching the resource type chosen. +% if Lens is an RDF lens this is used directly. +% +% * max(+Number) +% Maximum number of triples per property +% +% * src(+Boolean) +% Include src of the triples +% If true, Graph will contain quads +% +% * bnode_expand(+Boolean) +% Include triples of blank nodes +% +% * rdfs_plus_skos(+List) +% @see rdfs_plus_skos/6 + +iface_subject_graph(R, Graph, Options) :- + option(predicate(P), Options), + P \== [], + ( is_list(P) + -> Ps = P + ; nonvar(P) + -> Ps = [P] + ), !, + iface_subject_graph(R, Ps, Graph, Options). +iface_subject_graph(R, Graph, Options) :- + option(lens(Lens0), Options), + ( Lens0 == true + -> iface_resource_lens(R, Lens) + ; rdfs_individual_of(Lens0, iface:'Lens') + -> Lens = Lens0 + ), + iface_lens_properties(Lens, Ps), !, + iface_lens_options(Lens, Options, Options1), + iface_subject_graph(R, Ps, Graph, Options1). +iface_subject_graph(R, Graph, Options) :- + ( is_list(R) + -> Rs = R + ; Rs = [R] + ), + findall(P, (member(U,Rs),rdf(U,P,_)), Ps0), + sort(Ps0, Ps), + iface_subject_graph(R, Ps, Graph, Options). + +iface_subject_graph(R, Ps, Graph, Options) :- + ( is_list(R) + -> Rs = R + ; Rs = [R] + ), + option(rdfs_plus_skos(Reasoning), Options, []), + resource_ext_map(Reasoning, EMap), + query_ext_map(Reasoning, QMap), + option(src(Src), Options, false), + option(max(Max), Options, inf), + findall(Edge, + ( member(S,Rs), + member(P, Ps), + edges(EMap, QMap, Src, P, S, _, Max, Edges), + member(Edge0, Edges), + iface_edge_object_expand(EMap, Edge0, Edge) + ), + Graph0), + ( option(bnode_expand(true), Options) + -> expand_bnodes(Graph0, Graph, Options) + ; Graph = Graph0 + ). + +expand_bnodes(Graph, ExpandedGraph, Options) :- + findall(Bnode, + (member(rdf(_S,_P,Bnode,_G), Graph), + rdf_is_bnode(Bnode) + ), + Bnodes), + (Bnodes = [] + -> ExpandedGraph = Graph + ; iface_subject_graph(Bnodes, Graph0, Options), + expand_bnodes(Graph0, ExpandedBnodes, Options), + append(Graph, ExpandedBnodes,ExpandedGraph) + ), + + true. + +%% iface_object_graph(+URIs, -Graph, +Options) +% +% Graph contains all triples with URI as an object. +% +% Options see iface_subject_graph/3. + +iface_object_graph(R, Graph, Options) :- + option(predicate(P), Options, _), + P \== [], + ( is_list(P) + -> Ps = P + ; nonvar(P) + -> Ps = [P] + ), + iface_object_graph(R, Ps, Graph, Options). +iface_object_graph(R, Graph, Options) :- + ( is_list(R) + -> Rs = R + ; Rs = [R] + ), + findall(P, (member(U,Rs),rdf(_,P,U)), Ps0), + sort(Ps0, Ps), + iface_object_graph(R, Ps, Graph, Options). + +iface_object_graph(R, Ps, Graph, Options) :- + resource_ext_map(Options, EMap), + query_ext_map(Options, QMap), + option(max(Max), Options, inf), + option(src(Src), Options, false), + findall(Edge, + ( member(P, Ps), + edges(EMap, QMap, Src, P, _, R, Max, Edges), + member(Edge, Edges) + ), + Graph). + +%% iface_predicate_graph(+URI, -Graph, +Options) +% +% Graph contains all triples with URI as an object. +% +% Options see iface_subject_graph/4. + +iface_predicate_graph(P, Graph, Options) :- + resource_ext_map(Options, EMap), + query_ext_map(Options, QMap), + option(max(Max), Options, inf), + option(src(Src), Options, false), + edges(EMap, QMap, Src, P, _, _, Max, Graph). + +%% iface_graph_add_properties(+GraphIn, +Rs, +Ps, -GraphOut, +Options) +% +% Extend graph with new triples about Rs according to Ps. + +iface_graph_extend(GraphIn, Rs, Ps, GraphOut, Options) :- + resource_ext_map(Options, EMap), + query_ext_map(Options, QMap), + iface_graph_add_properties(Rs, Ps, GraphIn, EMap, QMap, GraphOut). + +iface_graph_add_properties([], _, Graph, _, _, Graph) :- !. +iface_graph_add_properties([S|Ss], Ps, Graph0, EMap, QMap, Graph) :- + atom(S), !, + iface_add_properties(Ps, S, Graph0, EMap, QMap, Graph1), + iface_graph_add_properties(Ss, Ps, Graph1, EMap, QMap, Graph). +iface_graph_add_properties([_|Ss], Ps, Graph0, EMap, QMap, Graph) :- + iface_graph_add_properties(Ss, Ps, Graph0, EMap, QMap, Graph). + +iface_add_properties([], _, Graph, _, _, Graph) :- !. +iface_add_properties([P|Ps], S, Graph0, EMap, QMap, Graph) :- + iface_has(EMap, QMap, S, P, V, RealP), !, + iface_add_properties(Ps, S, [rdf(S,RealP,V)|Graph0], EMap, QMap, Graph). +iface_add_properties([_|Ps], S, Graph0, EMap, QMap, Graph) :- + iface_add_properties(Ps, S, Graph0, EMap, QMap, Graph). + + /****************************** + * edges * + ******************************/ + +%% edges(+EMap, +QMap, +GetSrc, +P, ?S, ?O, +Max,-Edges) +% +% Edges contains all edges over P with a maximum of +% Max. + +edges(EMap, QMap, Src, P, S, O, Max, Edges) :- + answer_count(Edge, iface_edge(EMap,QMap,Src,P,S,O,Edge), Max, Edges). + +%% iface_edge(+EMap, +QMap, +SrcBool, +P, ?S, ?O, -Edge) +% +% Edge is a triple or quad unifying S,P,O with a triple in the graph. +% +% * EMap and QMap indicate the rdfs_plus_skos reasoning @see +% rdfs_plus_skos/5 + +iface_edge(EMap, QMap, SrcBool, P, S, O, Edge) :- + rdfs_plus_skos(EMap, QMap, S, P, O, _,RealP,_), + ( SrcBool, + rdf(S,RealP,O,Src) + -> Edge = rdf(S,P,O,Src) + ; Edge = rdf(S,P,O) + ). + +%% iface_edge_object_expand(+ExtMap, +EdgeIn, -EdgeOut) +% +% Edge is rdf(S,P,O) or an expandsion thereof. +% +% Currently we only expand if there is no triple source. + +iface_edge_object_expand(EMap, rdf(S,P,O0), rdf(S,P,O)) :- !, + representative(EMap, O0, O1), + ( % @TBD test for rdf_value + rdf_has(O1,rdf:value,O) + -> true + ; O = O1 + ). + +iface_edge_object_expand(_, Edge, Edge). + + /****************************** + * rdf graph operations * + ******************************/ + +%% iface_language_filter(+GraphIn, +LanguageList, -GraphOut) +% +% GraphOut contains all triples except those with a +% language not in LanguageList. +% +% Do we want to filter out literals without a language tag ? + +iface_language_filter([], _, []). +iface_language_filter([Triple|Ts], LangList, Rest) :- + literal_language(Triple, L), + \+ memberchk(L, LangList), !, + iface_language_filter(Ts, LangList, Rest). +iface_language_filter([Triple|Ts], LangList, [Triple|Rest]) :- + iface_language_filter(Ts, LangList, Rest). + +literal_language(rdf(_,_,literal(lang(L0, _))), L) :- + sub_atom(L0,0,2,_,L). +literal_language(rdf(_,_,literal(lang(L0, _)),_), L) :- + sub_atom(L0,0,2,_,L). + +%% iface_used_namespaces(+Graph, -Namespaces) +% +% Namespaces is a list of unique namespaces used in Graph. + +iface_used_namespaces(Graph, List) :- + findall(Ns, + ( rdf_triple(Graph, S,P,O), + triple_ns(S,P,O, Ns) + ), + List0), + sort(List0, List). +triple_ns(R,_,_,Ns) :- + \+ rdf_is_bnode(R), + rdf_url_namespace(R, Ns). +triple_ns(_,R,_,Ns) :- + \+ rdf_is_bnode(R), + rdf_url_namespace(R, Ns). +triple_ns(_,_,R,Ns) :- + atom(R), + \+ rdf_is_bnode(R), + rdf_url_namespace(R, Ns). + +rdf_triple(Graph, S,P,O) :- + member(rdf(S, P, O), Graph). +rdf_triple(Graph, S,P,O) :- + member(rdf(S, P, O,_), Graph). + + + /****************************** + * rdf_has variants * + ******************************/ + +%% iface_has(?S, ?P, ?O). +%% iface_has(+EMap, QMap, ?S, ?P, ?O, ?RealP). +% +% As rdfs_plus_skos/5, but also try interface:edge/5 + +iface_has(S,P,O) :- + iface_has(0, 0, S, P, O, _). + +iface_has(EMap, QMap, S, P, O, RealP) :- + ( var(P) -> true; atom(P) ), + rdfs_plus_skos(EMap, QMap, S, P, O, _, RealP, _). +iface_has(EMap, QMap, S, P, O, P) :- + interface:edge(P, EMap, QMap, S, O). + +%% iface_pref_value(?S, ?P, ?O). +%% iface_pref_value(+EMap, +QMap, ?S, ?P, ?O, -RealP) +% +% As rdfs_plus_skos/5, but first tries a preferred subproperty. +% +% @TBD exlcude preferred property values in second clause + +iface_pref_value(S, P, V) :- + iface_pref_value(0, 0, S, P, V, _). + +iface_pref_value(EMap, QMap, S, P, O, RealP) :- + iface_pref_property(P, PrefP), + rdfs_plus_skos(EMap, QMap, S, PrefP, O, _,RealP,_). +iface_pref_value(EMap, QMap, S, P, O, RealP) :- + rdfs_plus_skos(EMap, QMap, S, P, O, _,RealP,_). + +%% iface_literal_value(?S, ?P, ?Literal). +%% iface_literal_value(+EMap, +QMap, ?S, ?P, ?Literal, ?RealP). +% +% Succeeds if the triple Subject,Predicate,Literal exists, +% but first tries +% +% 1. literals in user language +% 2. literals in english +% 3. any language + +iface_literal_value(S, P, L) :- + iface_literal_value(0, 0, S, P, L, _). + +iface_literal_value(EMap, QMap, S, P, literal(lang(Lang, L)), RealP) :- + setting(user:lang, Lang), + rdfs_plus_skos(EMap, QMap, S, P, literal(lang(Lang, L)), _,RealP,_). +iface_literal_value(EMap, QMap, S, P, literal(lang(En, L)), RealP) :- + english_language_tag(En), + rdfs_plus_skos(EMap, QMap, S, P, literal(lang(En, L)), _,RealP,_). +iface_literal_value(EMap, QMap, S, P, literal(L), RealP) :- + rdfs_plus_skos(EMap, QMap, S, P, literal(L), _,RealP,_). + +%% iface_pref_literal_value(?S, ?P, ?Literal). +%% iface_pref_literal_value(+EMap, +QMap, ?Subject, ?Predicate, ?Literal, ?RealP). +% +% Succeeds if the triple Subject,Predicate,Literal exists, +% but first tries +% +% preferred property with user language +% any property with user language +% preferred property with english +% any property with english +% preferred property with any language +% any property with any language + +iface_pref_literal_value(S, P, L) :- + setting(user:lang, Lang), + iface_pref_literal_value(0, 0, Lang, S, P, L, _). + +iface_pref_literal_value(EMap, QMap, S, P, L, RealP) :- + setting(user:lang, Lang), + iface_pref_literal_value(EMap, QMap, Lang, S, P, L, RealP). + + +iface_pref_literal_value(EMap, QMap, Lang, S, P, literal(lang(Lang, L)), RealP) :- + iface_pref_property(P, PrefP), + rdfs_plus_skos(EMap, QMap, S, PrefP, literal(lang(Lang, L)), _,RealP,_). +iface_pref_literal_value(EMap, QMap, Lang, S, P, literal(lang(Lang, L)), RealP) :- + rdfs_plus_skos(EMap, QMap, S, P, literal(lang(Lang, L)), _,RealP,_). +iface_pref_literal_value(EMap, QMap, Lang, S, P, literal(lang(En, L)), RealP) :- + english_language_tag(En), + Lang \== En, + ( iface_pref_property(P, PrefP), + rdfs_plus_skos(EMap, QMap, S, PrefP, literal(lang(En, L)), _,RealP,_) + ; rdfs_plus_skos(EMap, QMap, S, P, literal(lang(En, L)), _,RealP,_) + ). +iface_pref_literal_value(EMap, QMap, _Lang, S, P, literal(L), RealP) :- + iface_pref_property(P, PrefP), + rdfs_plus_skos(EMap, QMap, S, PrefP, literal(L), _,RealP,_). +iface_pref_literal_value(EMap, QMap, _Lang, S, P, literal(L), RealP) :- + rdfs_plus_skos(EMap, QMap, S, P, literal(L), _,RealP,_). + + + /****************************** + * interface properties * + ******************************/ + +%% iface_resource_lens(+R, ?Lens) +% +% Lens applies to R. + +iface_resource_lens(R, Lens) :- + rdfs_individual_of(R, Class), + rdf(Lens, iface:lensType, Class). + +%% iface_lens_properties(+Lens, -Properties) +% +% Properties is a list of lensProperties accociated with Lens. + +iface_lens_properties(Lens, Ps) :- + rdf(Lens, iface:lensProperty, List), + rdfs_list_to_prolog_list(List, Ps), + !. +iface_lens_properties(Lens, Ps) :- + findall(P, rdf(Lens, iface:lensProperty, P), Ps), + Ps \== []. + +%% iface_lens_options(Lens, Options0, Options) + +iface_lens_options(Lens, Options0, Options) :- + findall(Q, rdf(Lens, iface:reasoning, literal(Q)), QL), + QL \== [], !, + ( select_option(rdfs_plus_skos(Q0), Options0, Options1) + -> union(QL, Q0, Q), + Options = [rdfs_plus_skos(Q)|Options1] + ; Options = [rdfs_plus_skos(QL)|Options0] + ). +iface_lens_options(_, Options, Options). + + + +%% iface_label(+Resource, -Label). +%% iface_label(+EMap, +QMap, +Resource, -Label) +% +% Label is the the prefered display label of Resource + +iface_label(R, Label) :- + iface_label(0, 0, R, L), + literal_text(L, Label). + +iface_label(_, _, literal(L), literal(L)) :- !. +iface_label(EMap, QMap, R, Label) :- + setting(user:lang, Lang), + iface_pref_lang_label(EMap, QMap, R, Lang, Label), + !. +iface_label(EMap, QMap, R, Label) :- + english_language_tag(En), + iface_pref_lang_label(EMap, QMap, R, En, Label), + !. +iface_label(EMap, QMap, R, Label) :- + ( rdfs_plus_skos(EMap, QMap, R, iface:prefLabel, O) + -> true + ; rdfs_plus_skos(EMap, QMap, R, rdfs:label, O) + ), + iface_label(EMap, QMap, O, Label), + !. +iface_label(EMap, QMap, R, Label) :- + rdf(R, rdf:value, Value), + iface_label(EMap, QMap, Value, Label), + !. +iface_label(_, _, R, Label) :- + atom(R), + rdf_split_url(_, Label1, R), + ( Label1 = '' + -> Label=R + ; Label=Label1 + ). + +iface_pref_lang_label(EMap, QMap, R, Lang, L) :- + ( rdfs_plus_skos(EMap, QMap, R, iface:prefLabel, literal(lang(Lang,L))) + -> true + ; rdfs_plus_skos(EMap, QMap, R, rdfs:label, literal(lang(Lang,L))) + ). + +%% iface_match_label(+R, +Match, -Label). +%% iface_match_label(+EMap, +QMap, +R, +Match, -Label). +% +% Label is a label of R and matches with Kwd. + +iface_match_label(R, Match, Label) :- + iface_match_label(0, 0, R, Match, Label). + +iface_match_label(EMap, QMap, R, Match, Label) :- + concat_atom(['*',Match,'*'], RegExp), + label_property(P), + iface_has(EMap, QMap, R, P, literal(like(RegExp), Lit), _), + !, + literal_text(Lit, Label). +iface_match_label(EMap, QMap, R, _Query, Label) :- + iface_label(EMap, QMap, R, Label). + +%% iface_match_literal(+R, +Match, -Txt). +%% iface_match_literal(+EMap, +QMap, +R, +Match, -Txt). +% +% Txt is a literal metadata property of R and matches with Kwd. + +iface_match_literal(R, Match, Label) :- + iface_match_literal(0, 0, R, Match, Label). + +iface_match_literal(EMap, QMap, R, Match, Label) :- + concat_atom(['*',Match,'*'], RegExp), + iface_has(EMap, QMap, R, _, literal(like(RegExp), Lit), _), + literal_text(Lit, Label). + +rdf_label:label_property(P) :- + rdf_equal(iface:prefLabel, P). +rdf_label:label_property(P) :- + rdf_equal(rdfs:label, P). + + +%% iface_class_of(+Resource, ?Class). +%% iface_class_of(+Type, +Resource, -Class). +% +% Class is the interface class of Resource. + +iface_class_of(Resource, Class) :- + iface_class_of(0, 0, Resource, Class). + +iface_class_of(_, _, Resource, _) :- + var(Resource), !, + instantiation_error(Resource). +iface_class_of(_, _, literal(_), C) :- + rdf_equal(C, rdfs:'Literal'), + !. +iface_class_of(EMap, QMap, Resource, Class) :- + rdfs_plus_skos(EMap, QMap, Resource, rdf:type, Class0), + rdf_reachable(Class0, rdfs:subClassOf, Class), + rdf(Class, rdf:type, iface:'Class'), + !. +iface_class_of(_, _, _Resource, RestClass) :- + rdf_equal(iface:'RestClass', RestClass). + + +%% iface_symbol_of(+Resource, -Symbol). +%% iface_symbol_of(+EMap, +QMap, +Resource, -Symbol). +% +% Symbol is the URI of an image that depicts Resource. +% Uses iface:depictedBy or the inverse iface:depicts. +% Add Mappings to these relations to add more properties. + +iface_symbol_of(Resource, Symbol) :- + iface_symbol_of(0, 0, Resource, Symbol). + +iface_symbol_of(EMap, QMap, Resource, Symbol) :- + rdfs_plus_skos(EMap, QMap, Resource, iface:prefDepictedBy, Symbol). +iface_symbol_of(EMap, QMap, Resource, Symbol) :- + rdfs_plus_skos(EMap, QMap, Symbol, iface:prefDepicts, Resource). +iface_symbol_of(EMap, QMap, Resource, Symbol) :- + rdfs_plus_skos(EMap, QMap, Resource, iface:depictedBy, Symbol). +iface_symbol_of(EMap, QMap, Resource, Symbol) :- + rdfs_plus_skos(EMap, QMap, Symbol, iface:depicts, Resource). + +%% is_image(+Resource) +% +% Resource is of type Image. + +is_image(Resource) :- + rdfs_individual_of(Resource, iface:'Image'), !. + +%% is_video(+Resource) +% +% Resource is of type Video. + +is_video(Resource) :- + rdfs_individual_of(Resource, iface:'Video'), !. + +%% iface_image(+Resource, -Image) +% +% Same as iface_thumbnail with Image at full size + +iface_image(R, Image) :- + iface_image(0, 0, R, Image). +iface_image(_, _, Resource, Image) :- + is_image(Resource), !, + image_url(Resource, Image). +iface_image(EMap, QMap, Resource, Image) :- + iface_symbol_of(EMap, QMap, Resource, Symbol), + \+ is_video(Symbol), + image_url(Symbol, Image). + +%% iface_thumbnail(+Resource, -Thumbnail) +% +% Thumbnail is the url of an image depicting Resource. + +iface_thumbnail(R, Thumbnail) :- + iface_thumbnail(0, 0, R, Thumbnail). + +iface_thumbnail(_, _, Resource, Thumbnail) :- + is_image(Resource), !, + thumbnail_url(Resource, Thumbnail). +iface_thumbnail(EMap, QMap, Resource, Thumbnail) :- + iface_symbol_of(EMap, QMap, Resource, Symbol), + \+ is_video(Symbol), + thumbnail_url(Symbol, Thumbnail). + +%% iface_video(+Resource, -Video) +% +% Video is about Resource. + +iface_video(R, Video) :- + iface_video(0, 0, R, Video). + +iface_video(_, _, Video, Video) :- + is_video(Video), !. +iface_video(EMap, QMap, Resource, Video) :- + iface_symbol_of(EMap, QMap, Resource, Video), + is_video(Video). + +%% iface_description(+Resource, -Description) +% +% Description is a literal description of Resource +% Uses iface:description Add Mappings to this relation to use +% domain specific property. + +iface_description(R, Description) :- + iface_description(0, 0, R, Description). +iface_description(EMap, QMap, Resource, Description) :- + iface_pref_literal_value(EMap, QMap, Resource, iface:description, Description, _). + +%% iface_abbreviation(+Resource, -Txt) +% +% Show short label of Resource. + +iface_abbreviation(Resource, Abbreviation) :- + iface_abbreviation(0, 0, Resource, Abbreviation). +iface_abbreviation(EMap, QMap, Resource, Abbreviation) :- + iface_pref_literal_value(EMap, QMap, Resource, iface:abbreviation, Abbreviation, _). + +%% iface_pref_property(+Property, -Preferred) +% +% Succeeds if Preferred is defined as a preferred property of +% Property. + +iface_pref_property(P, Preferred) :- + rdf(P, iface:hasPreferred, Preferred), + !. +iface_pref_property(P, Preferred) :- + rdf(Preferred, iface:preferredOf, P). + +%% iface_table_property(+Class, -Property) +% +% Property is defined as iface:tableProperty for Class. + +iface_table_property(Class, Property) :- + rdfs_subclass_of(SC, Class), + rdf(SC, iface:tableProperty, Property). + +%% iface_abstract_property(+Property, -Abstract) is det. +% +% True if Abstract is a high-level property for Property. + +iface_abstract_predicate(P, AP) :- + atom(P), + rdf_reachable(P, rdfs:subPropertyOf, AP), + iface_predicate(AP), !. +iface_abstract_predicate(P, P). + +%iface_predicate(P) :- +% catch(cliopatria:iface_predicate(P), _, fail), !. +iface_predicate(P) :- + rdfs_individual_of(P, iface:'Property'). + + +%% iface_abstract_class(+Class, -Abstract) is det. +% +% True if Abstract is a high-level class for Class. + +iface_abstract_class(Resource, Class) :- + class_of(Resource, Class0), + abstract_class(Class0, Class). + +abstract_class(Class, AClass) :- + rdfs_subclass_of(Class, AClass), + iface_class(AClass), + Class \== AClass. +abstract_class(Class, Class). + +%% class_of(+ResourceOrLiteral, -Class:atom) is det. +% +% Class is the class of ResourceOrLiteral. Returns rdfs:Literal if +% ResourceOrLiteral is a literal and rdfs:Resource if +% ResourceOrLiteral has no explicit class. +% +% Added by MH +% The preferred class is an interface class (iface:Class) + +class_of(literal(_), C) :- !, + rdf_equal(C, rdfs:'Literal'). +class_of(Resource, Class) :- + rdf(Resource, rdf:type, Class0), + rdf_reachable(Class0, rdfs:subClassOf, Class), + rdf(Class, rdf:type, iface:'Class'), + !. +class_of(O, C) :- + rdf_has(O, rdf:type, C), !. +class_of(_, Resource) :- + rdf_equal(Resource, rdfs:'Resource'). + +%iface_class(Class) :- +% catch(cliopatria:iface_class(Class), _, fail), !. +iface_class(Class) :- + rdfs_individual_of(Class, iface:'Class'). + + +%% iface_concept(+Resource) is semidet. +% +% True if Resource is a concept from some thesaurus. If the +% meta-modelling is correct, using skos:Concept should suffice +% here. This is used by concept_of/2. + +iface_concept(Resource) :- + atom(Resource), + rdfs_individual_of(Resource, skos:'Concept'), + %cliopatria:is_concept(Resource), + !. + + + /****************************** + * predefined edges * + ******************************/ + +:- setting(result:sublabel, uri, dc:creator, 'RDF property used for sublabel'). +:- setting(result:format, oneof([thumbnail,snippet]), thumbnail, 'Format of result'). + +:- multifile + interface:edge/5. + +:- rdf_meta + edge(r,+,o), + edge(+,+,r,+,o). + +interface:edge(S, P, O) :- + interface:edge(0, 0, S, P, O). + +interface:edge(label, EMap, QMap, S, O) :- + iface_label(EMap, QMap, S, O). +interface:edge(matchLabel(Kwd), EMap, QMap, S, O) :- + iface_match_label(EMap, QMap, S, Kwd, O). +interface:edge(matchLiteral(Kwd), EMap, QMap, S, O) :- + iface_match_literal(EMap, QMap, S, Kwd, O). +interface:edge(sublabel, EMap, QMap, S, O) :- + setting(result:sublabel, Property), + rdfs_plus_skos(EMap, QMap, S, Property, R), + interface:edge(label, EMap, QMap, R, O). +interface:edge(labels, EMap, QMap, S, Os) :- + findall(O, rdfs_plus_skos(EMap, QMap, S, rdfs:label, O), Os). +interface:edge(prefLabel, EMap, QMap, S, O) :- + iface_literal_value(EMap, QMap, S, iface:prefLabel, O, _). +interface:edge(altLabel, EMap, QMap, S, O) :- + iface_literal_value(EMap, QMap, S, iface:altLabel, O, _). +interface:edge(altLabels, EMap, QMap, S, Os) :- + findall(O, rdfs_plus_skos(EMap, QMap, S, iface:altLabel, O), Os). +interface:edge(description, EMap, QMap, S, O) :- + iface_pref_literal_value(EMap, QMap, S, iface:description, O, _). +interface:edge(abbreviation, EMap, QMap, S, O) :- + rdfs_plus_skos(EMap, QMap, S, iface:abbreviation, O). +interface:edge(alias, _, _, S, literal(Alias)) :- + rdf_global_id(Alias:_Local, S). +interface:edge(ns, _, _, URI, literal(Ns)) :- + rdf_url_namespace(URI, Ns). +interface:edge(registered_ns, _, _, URI, literal(Ns)) :- + atom(URI), + rdf_db:ns(_,Ns), + sub_atom(URI, _,_,_, Ns),!. +interface:edge(image, EMap, QMap, S, Image) :- + iface_image(EMap, QMap, S, Image). +interface:edge(images, EMap, QMap, S, Images) :- + findall(I, iface_image(EMap, QMap, S, I), Images). +interface:edge(thumbnail, EMap, QMap, S, Image) :- + iface_thumbnail(EMap, QMap, S, Image). +interface:edge(thumbnails, EMap, QMap, S, Images) :- + findall(I, iface_thumbnail(EMap, QMap, S, I), Images). +interface:edge(video, EMap, QMap, S, Video) :- + iface_video(EMap, QMap, S, Video). +interface:edge(videos, EMap, QMap, S, Videos) :- + findall(V, iface_video(EMap, QMap, S, V), Videos). +interface:edge(iclass, EMap, QMap, S, Class) :- + iface_class_of(EMap, QMap, S, Class). +interface:edge(lat, EMap, QMap, S, O) :- + rdfs_plus_skos(EMap, QMap, S, iface:latitude, O). +interface:edge(lng, EMap, QMap, S, O) :- + rdfs_plus_skos(EMap, QMap, S, iface:longitude, O). +interface:edge(date, EMap, QMap, S, Date) :- + ( rdfs_plus_skos(EMap, QMap, S, iface:startDate, Start) + -> ( rdfs_plus_skos(EMap, QMap, S, iface:endDate, End) + -> Date = date(Start, End) + ; Date = date(Start) + ) + ; rdfs_plus_skos(EMap, QMap, S, iface:date, Date) + ). +interface:edge(broader, EMap, QMap, S, O) :- + rdfs_plus_skos(EMap, QMap, S, iface:broader, O). +interface:edge(root, EMap, QMap, S, O) :- + rdfs_plus_skos(EMap, QMap, S, iface:broader, Parent), + rdf_reachable(Parent, iface:broader, O), + \+ rdf_has(O, iface:broader, _). +interface:edge(inlink, _, _, S, Score) :- + rdf_estimate_complexity(_,_,S,CO), + Score is 1/(1+CO). +interface:edge(outlink, _, _, S, Score) :- + rdf_estimate_complexity(S,_,_,CS), + Score is 1/(1+CS). +interface:edge(links, _, _, S, Score) :- + rdf_estimate_complexity(_,_,S,CO), + rdf_estimate_complexity(S,_,_,CS), + Score is 1/(1+CO+CS). + +interface:edge(example_thumbs, EMap, QMap, S, Thumbs) :- + answer_count(Thumb, + ( rdf(Resource, _, S), + iface_thumbnail(EMap, QMap, Resource, Thumb) + ), + 5, + Thumbs + ). + +%% thumbnail_url(+Image, -Thumbnail) +% +% Thumbnail is the URI of a Thumbnail +% that depicts Resource. + +thumbnail_url(Image, Thumbnail) :- + http_link_to_id(http_thumbnail, Thumbnail, [uri(Image)]). + +image_url(Image, Thumbnail) :- + http_link_to_id(http_mediumscale, Thumbnail, [uri(Image)]). diff --git a/lib/cluster_search/json_graph.pl b/lib/cluster_search/json_graph.pl new file mode 100644 index 0000000..edf296e --- /dev/null +++ b/lib/cluster_search/json_graph.pl @@ -0,0 +1,343 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(json_graph, + [ graph_to_json/2, % +Graph:[rdf(s,p,v)], -JSONTerm + graph_to_json/3, % +Type, +Graph:[rdf(s,p,v)], -JSONTerm + graph_to_json/4, % +Type, +Graph:[rdf(s,p,v)], -JSONTerm, +Options + triples_to_json/4, % +Type, +Graph:[rdf(s,p,v)], -JSONTerm, +Options + rdf_group_by/3, % +Type, +Graph:[rdf(s,p,v)], -GroupedGraph + rdf_resource_to_json/2 % +Resource, -JSONTerm + ]). + +% http +:- use_module(library(http/json)). +:- use_module(library(http/json_convert)). +:- use_module(library(http/http_json)). + +% semweb +:- use_module(library(semweb/rdf_label)). + +% util +:- use_module(iface_util). + +%% graph_to_json(+Type, +Graph, -JSON, +Options) +% +% JSON is a prolog json term of Graph. +% Based on "http://n2.talis.com/wiki/RDF_JSON_Specification". +% +% Usage reply_json(JSON) to write JSON to stream. +% +% @Param Type triples={subject, predicate, object} +% spo={subject:predicate:[object]} +% ops={object:predicate:[subject]} +% etc. + +graph_to_json(Graph, JSON) :- + graph_to_json(spo, Graph, JSON, []). + +graph_to_json(Type, Graph, JSON) :- + graph_to_json(Type, Graph, JSON, []). + +graph_to_json(triples, Graph, JSON, Options) :- !, + triples_to_json(spo, Graph, JSON, Options). + +graph_to_json(Type0, Graph, json(JSON), Options) :- + output_type(Type0, Type), + option(smax(SMax), Options, -1), + option(pmax(PMax), Options, -1), + option(omax(OMax), Options, -1), + ( option(prefix(true), Options) + -> JSON = [prefixes=Prefixes|JSONGraph], + graph_prefixes_to_json(Graph, Prefixes) + ; JSON = JSONGraph + ), + graph_to_json(Type, Graph, SMax, PMax, OMax, JSONGraph). + +output_type(display, spo) :- !. +output_type(subject, spo) :- !. +output_type(predicate, pso) :- !. +output_type(object, ops) :- !. +output_type(Type, Type). + + +%% graph_to_json(+IndexType, +Graph, +Max1, +Max2, +Max3, -JSON) +% +% JSON is a json term of graph indexed according to IndexType. + +graph_to_json(spo, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(1,2,3, Graph, SMax, PMax, OMax, JSON). + +graph_to_json(ops, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(3,2,1, Graph, OMax, PMax, SMax, JSON). + +graph_to_json(pso, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(2,1,3, Graph, PMax, SMax, OMax, JSON). + +graph_to_json(pos, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(2,3,1, Graph, PMax, OMax, SMax, JSON). + +graph_to_json(sop, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(1,3,2, Graph, SMax, OMax, PMax, JSON). + +graph_to_json(osp, Graph, SMax, PMax, OMax, JSON) :- !, + indexed_triples_to_json(3,1,2, Graph, OMax, SMax, PMax, JSON). + +graph_to_json(Type, _, _, _, _, _) :- + domain_error('output type', Type). + + +%% triples_to_json(+Graph, -JSON, +Options) +% +% JSON is a RDF graph in prolog json term notation. +% use json_reply to output. + +triples_to_json(Type, Graph, JSON, Options) :- + option(source(Source), Options, false), + option(subject_info(SP), Options, []), + option(object_info(OP), Options, []), + option(predicate_info(PP), Options, []), + triples_to_json(Graph, Type, Source, SP,PP,OP, JSON). + +triples_to_json([], _, _, _, _, _, []). +triples_to_json([Triple|T], Type, Source, SP,PP,OP, [json(JSON)|Rest]) :- + ( Triple = rdf(S,P,O,Src), + Source = true + -> ( Src = File:Lineno + -> rdf_resource_to_json(File,SrcList), + JSON = [source=json(SrcList), + lineno=Lineno| + JSON0 + ] + ; rdf_resource_to_json(Src,SrcList), + JSON = [source=json(SrcList)| + JSON0 + ] + ) + ; Triple = rdf(S,P,O,_) + -> JSON = JSON0 + ; Triple = rdf(S,P,O) + -> JSON = JSON0 + ), + triple_to_json(Type, S,P,O, SP,PP,OP, JSON0), + triples_to_json(T, Type, Source, SP,PP,OP, Rest). + + +%% triple_to_json(+Type,+Subject,+Predicate,+Object) +% +% JSONList contains json terms for S,P and O. + +triple_to_json(spo, S,P,O, SP,PP,OP, JSONList) :- + JSONList = [subject=json(SList), + predicate=json(PList), + object=json(OList)], + rdf_resource_to_json(S, SP, SList), + rdf_resource_to_json(P, PP, PList), + rdf_resource_to_json(O, OP, OList). +triple_to_json(ps, S,P,_, SP,PP,_, JSONList) :- + JSONList = [subject=json(SList), + predicate=json(PList)], + rdf_resource_to_json(S, SP, SList), + rdf_resource_to_json(P, PP, PList). +triple_to_json(po, _,P,O, _,PP,OP, JSONList) :- + JSONList = [object=json(OList), + predicate=json(PList)], + rdf_resource_to_json(O, OP, OList), + rdf_resource_to_json(P, PP, PList). +triple_to_json(so, S,_,O, SP,_,OP, JSONList) :- + JSONList = [subject=json(SList), + object=json(OList)], + rdf_resource_to_json(S, SP, SList), + rdf_resource_to_json(O, OP, OList). + + +% % indexed_triples_to_json(+Arg1, +Arg2, +Arg3, +Graph, +Max1, +Max2, +Max3, -JSON) +% +% JSON is a prolog term in json notation indexed first by the Arg1 +% argument, then Arg2 argument. + +indexed_triples_to_json(Arg1, Arg2, Arg3, Graph, Max1, Max2, Max3, JSON) :- + rdf_group_by(Arg1, Graph, Pairs), + pairs_to_json(Arg2, Arg3, Pairs, Max1, Max2, Max3, JSON). + + +% % pairs_to_json(+Arg2, +Arg3, +Pairs:arg1-[triple], +Max, +Max2, +Max3, -JSON) +% +% Convert triples grouped by Key to a JSON term. +% +% @Param Max maximum Arg1 items that are shown. +% @param Max2 maximum Arg2 items that are shown. +% @param Max3 maximum Arg3 items that are shown. + +pairs_to_json(_, _, [], _, _, _, []) :- !. +pairs_to_json(_, _, Rest, 0, _, _, [Msg]) :- !, + rest_to_json(Rest, Msg). +pairs_to_json(Arg2, Arg3, [R0-Triples|T], N, Max2, Max3, [R=json(JSON)|Rest]) :- + N1 is N - 1, + resource_to_value(R0, R), % if R0 is a literal we loose information ! + rdf_group_by(Arg2, Triples, Pairs), + pairs_to_json(Arg3, Pairs, Max2, Max3, JSON), + pairs_to_json(Arg2, Arg3, T, N1, Max2, Max3, Rest). + + +% % pairs_to_json(+Arg3, +Pairs:arg2-[triple], +Max2, +Max3, -JSON) +% +% Convert triples grouped by arg2 to a JSON. + +pairs_to_json(_, [], _, _, []) :- !. +pairs_to_json(_, Rest, 0, _, [Msg]) :- !, + rest_to_json(Rest, Msg). +pairs_to_json(Arg3, [R0-Triples|T], N, Max3, [R=JSON|Rest]) :- + N1 is N - 1, + resource_to_value(R0, R), + resources_to_json(Arg3, Triples, Max3, JSON), + pairs_to_json(Arg3, T, N1, Max3, Rest). + + +%% resources_to_json(+Nth, +Triples, +Max, -JSON) +% +% Convert all the Nth arguments from Triple to a JSON term. + +resources_to_json(_, [], _, []) :- !. +resources_to_json(_, Rest, 0, [Msg]) :- !, + rest_to_json(Rest, Msg). +resources_to_json(Arg, [Triple|Ts], N, [json(JSON)|Vs]) :- + N1 is N-1, + arg(Arg, Triple, R), + rdf_resource_to_json(R, JSON0), + ( arg(4,Triple,Src:Line) + -> JSON = [src=Src,lineno=Line|JSON0] + ; arg(4,Triple,Src) + -> JSON = [src=Src|JSON0] + ; JSON = JSON0 + ), + resources_to_json(Arg, Ts, N1, Vs). + + +%% rdf_resource_to_json(+Resource, +Ps, -JSON) +% +% Convert an RDF Resource to a JSON term and the properties from +% Ps. + +rdf_resource_to_json(R, [], JSON) :- !, + rdf_resource_to_json(R, JSON). +rdf_resource_to_json(R, Ps, JSON) :- + rdf_resource_to_json(R, Vs0), + iface_resource_properties(R, Ps, Vs1), + append(Vs0, Vs1, JSON). + + +%% rdf_resource_to_json(+Resource, -JSON) +% +% Convert an RDF Resource to a JSON term. + +rdf_resource_to_json(Bool, Object) :- + boolean_to_json(Bool, Boolean), !, % Why did we need this? + Object = [value=Boolean, type=boolean]. + +rdf_resource_to_json(literal(Lit), Object) :- !, + Object = [value=Txt, type=literal|Rest], + literal_to_json(Lit, Txt, Rest). + +rdf_resource_to_json(URI0, Object) :- + rdf_global_id(URI0, URI), + Object = [value=URI, type=Type], + object_uri_type(URI, Type). + + +%% literal_to_json(+Literal, -Text, -Attributes) +% +% Extract text and Attributes from Literal resource. + +literal_to_json(lang(Lang, Txt), Txt, [lang=Lang]) :- !. +literal_to_json(type(Type, Txt0), Txt, [datatype=Type]) :- !, + literal_text(type(Type,Txt0), Txt). % hack to handle XML data +literal_to_json(Txt, Txt, []). + + +%% boolean_to_json(?Bool, ?JSONBool) +% +% JSONBool has an extra @ in front of true or false. + +boolean_to_json(false, @false). +boolean_to_json(true, @true). + + +%% rest_to_json(+Rest, -JSON) +% +% JSON is a json term with information about +% the number of resources that are not in the output. + +rest_to_json(Rest, json(Msg)) :- + length(Rest, C), + Msg = [type=more, value=C]. + + +%% object_uri_type(+URI, -Type) +% +% Type is one of bnode or uri. + +object_uri_type(URI, Type) :- + ( rdf_is_bnode(URI) + -> Type = bnode + ; Type = uri + ). + + +%% resource_to_value(+Resource, -Value) +% +% As keys should be atoms we have to remove the literal +% information and only keep the label. + +resource_to_value(literal(L), Txt) :- !, + literal_text(L, Txt). +resource_to_value(R, R). + + +%% rdf_group_by(+Argument, +Graph, -Pairs) +% +% Pairs contains triples from Graph grouped by +% one of the three arguments. +% @Param Argument the argument to group by 1,2 or 3 + +rdf_group_by(Arg, Graph, Groups) :- + map_list_to_pairs(arg(Arg), Graph, Pairs0), + keysort(Pairs0, Pairs), + group_pairs_by_key(Pairs, Groups). + + + +%% graph_prefixes_to_json(+Graph, -Prefixes) +% +% + +graph_prefixes_to_json(Graph, json(Prefixes)) :- + iface_used_namespaces(Graph, FullNS), + add_ns_abbreviations(FullNS, Prefixes). + +add_ns_abbreviations([], []). +add_ns_abbreviations([NS|T0], [Abbr=NS|T]) :- + rdf_db:ns(Abbr, NS), !, + add_ns_abbreviations(T0, T). +add_ns_abbreviations([_|T0], T) :- + add_ns_abbreviations(T0, T). diff --git a/lib/cluster_search/kwd_search.pl b/lib/cluster_search/kwd_search.pl new file mode 100644 index 0000000..55cc819 --- /dev/null +++ b/lib/cluster_search/kwd_search.pl @@ -0,0 +1,558 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(kwd_search, + [ match_uri/7, % +Atom, +Range, +Threshold, +Map, -URI, -Score, -Path + match_string/7, % +Atom, +Range, +Threshold, +Map, -URI, -Score, -Path + search_string/6, % +Atom, +Range, +Threshold, +Map, -Score, -Path + search_graph/7, % +From, +Range, +Score0, +Threshold, +Map, -Score, -Path + + find_literals/3 % +Search, +Threshold, -Literals + ]). +:- use_module(library(lists)). +:- use_module(library(assoc)). +:- use_module(library(debug)). +:- use_module(library('semweb/rdf_db')). +:- use_module(library('semweb/rdfs')). +:- use_module(library('semweb/rdf_litindex')). +:- use_module(library(porter_stem)). +:- use_module(library(semweb/owl)). +:- use_module(library(find_resource)). +:- use_module(rdf_graph). +:- use_module(rdf_search). +:- use_module(rdf_cluster). +:- use_module(library(semweb/rdf_abstract)). +:- use_module(graph_search). +:- use_module(fuzzy). + + +:- rdf_meta + match_string(+, r, +, +, -), + search_string(+, t, +, +, -, -). + +%% match_string(+Atom, -Matches, +Options) +% +% Find matches for the literal string Atom controlled by Options. +% Options supported are: +% +% * filter(+URI, :Goal) +% Calls Goal to filter target objects. +% +% * score(+Path, -Score, :Goal) +% Calls Goal to rate Path. Score most be unified to a +% float between 0.0 and 1.0. If Goal fails this path is +% discarded. +% +% * score_literal(+Literal, -Score, :Goal) +% Score the initial literal match. +% +% * threshold(Float) +% Float in the range 0.0..1.0, stopping the search if +% the score of a path is below Float. +% +% * threshold(Max, Min, MinHits) +% Perform iterative lowering of threshold, starting at +% Max and stopping at Min or after at least MinHits +% hits have been found. +% +% Example +% +% == +% match_string(gogh, Matches, +% [ filter(URI, rdfs_individual_of(URI, ulan:'Person')) +% score([rdfs:label|_], 1, true) +% ]) +% == +% +% @param Matches is a list of hit(URI, Score, Path) terms. + + +%match_string(Word, Matches, Options) :- +% select(threshold(Max, Min, MinHits), Options, Options1), !, +% iterative_threshold(4, Max, Min, Threshold), +% match_string(Word, Matches, [threshold(Threshold)|Options1]), +% length(Matches, HitCount), +% HitCount >= MinHits. +%match_string(Word, Matches, Options) :- +% find_literals(Word, ScoredLiterals, Options), +% search_graph(ScoredLiterals, Matches, Options). + + +%% iterative_threshold(+Max, +Min, +Steps, -Threshold) is nondet. +% +% Generate thresholds, starting at Max and going in Steps equal +% relative steps down to Min. + +%iterative_threshold(Max, Min, Steps, Threshold) :- +% Steps1 is Steps - 1, +% Itv = log(Max) - log(Min), +% between(0, Steps1, Step), +% Threshold is exp(log(Max) - (Itv*Step/Steps1)). + + + +%% match_string(+Atom, +Range, +Threshold, +Map, +%% -URI, -Score, -Path) is nondet. +% +% Search URIs related to the string Atom. +% +% @param Atom Search specification. Will be tokenized. +% @param Range Target objects. Passed to owl_satisfies/2. +% @param Threshold Search cut-off threshold (0..1) +% @param Map List of Pred-Factor. +% @param URI Matching URI +% @param Score Score of the match (0..1) +% @param Path List of Pred-Obj with path from matching literals to URI. +% Note that this does *not* include URI. +% @see search_string/7 +% @tbd Complete description + +match_string(Atom, Range, Threshold, Map, URI, Score, Path) :- + findall(Score1-Path1, + search_string(Atom, all_values_from(Range), Threshold, Map, Score1, Path1), + Pairs0), + join_probabilities(Pairs0, Pairs1), + keysort(Pairs1, Pairs2), + reverse(Pairs2, Pairs3), + member(Score-URI, Pairs3), + + findall(Score2-Path2, member(Score2-[URI|Path2], Pairs0), Paths), + keysort(Paths,Paths0), + reverse(Paths0,[_Score-Path|_Tail]). % TBD Should this _Score not be the result? + + +match_uri(Atom, Range, Threshold, Map, URI, Score, Path) :- + assoc_map(Map, Assoc), + findall(Score1-Path1, + search_graph(Atom, all_values_from(Range), 1, Threshold, Assoc, Score1, Path1), + Pairs0), + join_probabilities(Pairs0, Pairs1), + keysort(Pairs1, Pairs2), + reverse(Pairs2, Pairs3), + member(Score-URI, Pairs3), + + findall(Score2-Path2, member(Score2-[URI|Path2], Pairs0), Paths), + keysort(Paths,Paths0), + reverse(Paths0,[_Score-Path|_Tail]). + +%% search_string(+Atom, +Range, +Threshold, +Map, -Score, -Path) +% +% We return all resources belonging to Range that either have a +% literal attribute from Atom or have a resource with such an +% attribute. Range is a description for owl_satisfies/2. Most +% typical usage is =|all_values_from(Class)|= +% +% Using one_of([Resource]) you can produce all paths leading to a +% specific resource. +% +% @tbd This is an old interface used by old basic search that +% is now hacked to run on top of the new search +% infrastructure. +% +% @param Map List of Pred-Factor. Current strength is multiplied +% by Factor for Pred. If strength becomes lower than +% Threshold the search is stopped. +% @param Path List of [Target, R1-P1, R2-P2, ... literal(X)-PN] + +search_string(Atom, Range, Threshold, _Map, Score, Path) :- + graph_search(Atom, State, + [ filter([owl_satisfies(Range)]), + threshold(Threshold), + prune(true) + ]), + rdf_search_property(State, graph(Graph)), + rdf_search_property(State, targets(Targets)), + member(Score-Target, Targets), + rdf_cluster:search_path(Target, Graph, [_Target|IPath]), + op_pairs(IPath, Path1), + Path = [Target|Path1], + debug(path, 'Path = ~p', [Path]). + +%% op_pairs(+List, -Pairs) is det. +% +% Translate between new path descriptions and the old ones. We +% also used to delete owl:sameAs here, but this is not the proper +% place as we want to highlight the use of sameAs in the +% interface. + +op_pairs([], []). +%op_pairs([P,_O|T0], T) :- +% rdf_equal(P, owl:sameAs), !, +% op_pairs(T0, T). +op_pairs([P,O|T0], [O-P|T]) :- + op_pairs(T0, T). + + + /******************************* + * LITERAL HANDLING * + *******************************/ + +%% find_literals(+SearchFor, -ScoredLiterals:list(Score-Literal), +Options) is det. +% +% Find all literals in the database that are related to SearchFor +% by at least one common stem. Order these literals by the quality +% of the match. Options: +% +% * threshold(+Float) +% If present, ignore literals that match with a score +% lower than threshold. +% +% @tbd Allow for more than stem and "compound term" matches. +% @bug Tokenization only deals with ISO Latin-1 text. + +find_literals(Search, Literals, Options) :- + option(threshold(Threshold), Options, 0.0), + ( rdf_tokenize_literal(Search, Tokens) + -> true + ; Tokens = [Search] % HACK + ), + all_literals(Tokens, Literals0), + sort(Literals0, Literals1), + length(Literals1, NL1), + debug(search, '~D matches', [NL1]), + sort_matches(Literals1, Tokens, Threshold, Literals). + +all_literals(Tokens, Literals) :- + compound_search_tokens(Tokens, Compounds, AllTokens), + list_to_and(AllTokens, LitCond), + rdf_find_literals(LitCond, Ls0), + ( Compounds == [] + -> Literals = Ls0 + ; tokenize_hits(Ls0, Tokenized), + filter_compounds(Compounds, Tokenized, KeyedLiterals), + pairs_values(KeyedLiterals, Literals) + ). + +%% compound_search_tokens(+Tokens, -Compounds:list(list), -AllTokens) +% +% Extract all tokens and the compounds that must be matched. + +compound_search_tokens([], [], []). +compound_search_tokens(['"'|Rest], [Compound|RC], Words) :- + append(Compound, ['"'|T], Rest), + append(Compound, RW, Words), !, + compound_search_tokens(T, RC, RW). +compound_search_tokens([H0|T], RC, [H|RW]) :- + mkmatch(H0, H), + compound_search_tokens(T, RC, RW). + +list_to_and([], true). +list_to_and([One], One) :- !. +list_to_and([H|T], and(H, And)) :- !, + list_to_and(T, And). + +mkmatch(Number, Number) :- + number(Number), !. +mkmatch(Atom, stem(Atom)). + +%% tokenize_hits(+Literals:list(atom), -Keyed:list(list-atom)) is det. + +tokenize_hits([], []). +tokenize_hits([H|T0], [L-H|T]) :- + ( tokenize_atom(H, L) + -> true + ; L = [H] % HACK + ), + tokenize_hits(T0, T). + +filter_compounds([], Literals, Literals). +filter_compounds([C0|C], L0, L) :- + filter_compound(L0, C0, L1), + filter_compounds(C, L1, L). + + +filter_compound([], _, []). +filter_compound([H|T0], Compound, [H|T]) :- + H = TL-_, + append(_, Rest, TL), + same_tokens(Compound, Rest), !, + filter_compound(T0, Compound, T). +filter_compound([_|T0], Compound, T) :- + filter_compound(T0, Compound, T). + + +same_tokens([], _). +same_tokens([H|T0], [H|T]) :- !, + same_tokens(T0, T). +same_tokens([-|T0], T) :- !, + same_tokens(T0, T). +same_tokens(T0, [-|T]) :- !, + same_tokens(T0, T). +same_tokens([H0|T0], [H1|T]) :- + porter_stem(H0, Stem), + porter_stem(H1, Stem), + same_tokens(T0, T). + + +%% sort_matches(+Matches, +Search, +Threshold, -Set) +% +% Sort matches by score, best first. Scoring is done using the +% search target in Search. All scores below Threshold are removed +% from the result-set. +% +% @tbd If Fuzzy is not =off=, score is always 1 and Set is +% the same as Matches. Should be cleaned. + +sort_matches(Set0, Search, Threshold, Set) :- + tag_match_score(Set0, Search, Threshold, Tagged), + keysort(Tagged, Set1), + reverse(Set1, Set). + + +%% tag_match_score(+Set:list(atom), +Search, +Threshold, -Result:list(Score-Atom)) +% +% Match literal scores on how well they match Search. Matches +% below Threshold are deleted from the result. + +tag_match_score([], _, _, []). +tag_match_score([H|T0], Search, Threshold, L) :- + match_score(Search, H, S), + %debug(search, 'Score ~w~n', [S]), + ( S >= Threshold + -> L = [S-H|T] + ; L = T + ), + tag_match_score(T0, Search, Threshold, T). + + +%% match_score(+Search, +Literal, -Score) is det. +% +% Determine quality of the score. There are two cases. Literal is +% the same or about the same as Search and Literal is a long +% literal and Search only provides a few stems or words in +% Literal. In the first case we use the `minimal edit' algorithm +% from fuzzy.pl. In the latter case our score depends on: +% +% * Percentage of tokens from search found in Literal +% * Whether they are stem or perfect matches +% * Longest adjacent sequence (stem-based) appear +% +% Values are normalised to 0...1, where 1 is a perfect match. + +match_score(Search, Literal, Score) :- + tokens(Search, SearchTokens), + tokens(Literal, LiteralTokens), + ( SearchTokens == LiteralTokens + -> Score = 1 + ; literal_distance(SearchTokens, LiteralTokens, LD) + -> Score is 3/(3+LD) + ; add_stems(SearchTokens, SearchStems), + add_stems(LiteralTokens, LitStems), + match_score2(SearchStems, LitStems, Score) + ). + +tokens(In, Tokens) :- + atom(In), !, + rdf_tokenize_literal(In, Tokens). +tokens(In, In) :- + assertion(is_list(In)). + +match_score2(Search, Lit, Score) :- + same_by_stem(Search, Lit), !, + Score = 0.95. +match_score2(Search, Lit, Score) :- + phrase(matches(Search, 1, Lit), Matches), + match_count(Matches, Count), + length(Lit, Len), + Score is min(1, Count/Len). + + +same_by_stem([], []). +same_by_stem([_-S|T0], [_-S|T]) :- + same_by_stem(T0, T). + + +%% matches(+For:list(W-S), +Start, +In:list(W-S))// +% +% Produces a list of m(Match, PosIn, PosFor), where Match is 1 for +% exact matches and 0.8 it the stem matches. + +matches([], _, _) --> + []. +matches([W-S|T], I, Lit) --> + match(Lit, I, 1, W, S), + { I2 is I + 1 }, + matches(T, I2, Lit). + +match([], _, _, _, _) --> + []. +match([LW-S|T], I, N, W, S) --> !, + ( { W == LW } + -> [ m(1, I,N) ] + ; [ m(0.8,I,N) ] + ), + { N2 is N + 1 }, + match(T, I, N2, W, S). +match([_|T], I, N, W, S) --> + { N2 is N + 1 }, + match(T, I, N2, W, S). + + +%% match_count(+Matches, -Score) is det. +% +% Sum the found matches. The idea was to give extra credit to +% contiguous chunks, but I cannot find that in the code. A bug? + +match_count([], 0). +match_count([m(S,I,N)|T], Score) :- + sequence(I, N, T, T1, S, Score1), + match_count(T1, Score2), + Score is Score1 + Score2. + +sequence(I, N, T0, T, S, Score) :- + I2 is I + 1, + N2 is N + 1, + select(m(S2,I2,N2), T0, T1), !, + Score1 is S+S2, + sequence(I2, N2, T1, T, Score1, Score). +sequence(_, _, T, T, S, S). + +%% add_stems(+Words:list(atom), -Stemmed:list(Word-Stem)) is det. +% +% @tbd Deal with non-iso-latin-1 text + +add_stems([], []). +add_stems([H|T0], [H-Stem|T]) :- + ( porter_stem(H, Stem) + -> true + ; Stem = H + ), + add_stems(T0, T). + + + /******************************* + * SEARCH * + *******************************/ + +%% assoc_map(+List, -Assoc) +% +% Translate P=F, ... list into an assoc, turning all NS:Local into +% global predicates. + +assoc_map(List, Assoc) :- + empty_assoc(Assoc0), + assoc_map(List, Assoc0, Assoc). + +assoc_map([], Assoc, Assoc). +assoc_map([P=F|T], Assoc0, Assoc) :- + rdf_global_id(P, G), + put_assoc(G, Assoc0, F, Assoc1), + assoc_map(T, Assoc1, Assoc). + + +%% search_graph(+From, +Range, +Score0, +Threshold, +Map, -Score, -Path) +% +% Search the RDF graph starting at From, looking for an object for +% which owl_satisfies(O, Range) succeeds. Score0 is the initial +% score. The search stops if the score is below Threshold. Map is +% an assoc from predicate to a number between 0, and 1. If a +% predicate is not in the map, the system looks for a +% super-predicate that is in the map. If that fails it looks for +% `default'. +% +% Score is the final score of the path and Path is a list that +% starts with the found target. Other elements in the list are +% pairs of the format "Object-Predicate". So, the path +% +% [T, R1-P1, R2-P2] +% +% is created from a search R2 <-P2-> R1 <-P1-> T + +search_graph(From, Range, Score0, Threshold, Map, Score, Path) :- + empty_assoc(Done), + search_graph(From, Range, Score0, Threshold, Map, Done, + Score, Path0), + reverse(Path0, Path). + +search_graph(Object, Range, Score, _, _, _, Score, Path) :- + owl_satisfies(Range, Object), + Path = [Object]. + +search_graph(Object, Range, Score0, Threshold, Map, Done, + Score, [Object-P|Path]) :- + ( rdf_has(O2, P, Object) + ; atom(Object), + rdf_has(Object, IP, O2), + rdf_has(P, owl:inverseOf, IP) + ), + \+ get_assoc(O2, Done, _), + + update_score(Score0, P, Map, Map1, Score1), + Score1 >= Threshold, + put_assoc(O2, Done, true, Done1), + search_graph(O2, Range, Score1, Threshold, Map1, Done1, Score, Path). + + +update_score(Score0, P, Map, Map, Score) :- + get_assoc(P, Map, F), !, + Score is Score0*F. +update_score(Score0, P, Map0, Map, Score) :- + ( rdfs_subproperty_of(P, Super), + get_assoc(Super, Map0, F) + -> true + ; get_assoc(default, Map0, F) + ), + put_assoc(P, Map0, F, Map), + Score is Score0*F. + + + + /******************************* + * JOIN PROBABILITIES * + *******************************/ + +%% join_probabilities(+Paths:list(Prop-Path), +%% -URIs:list(Prop-URI)) is det. +% +% Given a list of pairs Probability-Path, create a list +% Probability-URI, merging the probabilities of each path that +% leads to the same final resource. + +join_probabilities(P, Merged) :- + key_prop_by_uri(P, Keyed), + keysort(Keyed, Sorted), + join_by_uri(Sorted, Merged). + +key_prop_by_uri([], []). +key_prop_by_uri([P-[URI|_]|T0], [URI-P|T]) :- + key_prop_by_uri(T0, T). + +join_by_uri([], []). +join_by_uri([URI-P0|T0], [P-URI|T]) :- + same_uri(URI, T0, Ps, T1), + joined_probability([P0|Ps], P), + join_by_uri(T1, T). + +same_uri(URI, [URI-P|T0], [P|TP], T) :- !, + same_uri(URI, T0, TP, T). +same_uri(_, T, [], T). + +joined_probability(List, Joined) :- + sum_p_no(List, Sum), + Joined is 1 - Sum. + +sum_p_no([], 1). +sum_p_no([H|T], P) :- + sum_p_no(T, P0), + P is P0 * (1-H). + diff --git a/lib/cluster_search/owl_ultra_lite.pl b/lib/cluster_search/owl_ultra_lite.pl new file mode 100644 index 0000000..d440a45 --- /dev/null +++ b/lib/cluster_search/owl_ultra_lite.pl @@ -0,0 +1,224 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(owl_ultra_lite, + [ owl/3, + owl/4, + owl_subj/4, + owl_similar/3, + owl_similar/4, + owl_inv/3, + owl_inv/4, + owl_same_inv/3, + owl_same_inv/4, + same/2, + similar/2, + inverse_predicate/2, % +P, -IP + equivalence_set/2 % +URI, -EquivalenceSet + ]). + +:- use_module(library(assoc)). +:- use_module(library(error)). +:- use_module(library(semweb/rdf_db)). + + +rdf_optimise:rdf_db_goal(owl(S,P,O), S,P,O). +rdf_optimise:subj_branch_factor(owl(_,_,_), X, rdfs_subject_branch_factor(X)). +rdf_optimise:obj_branch_factor(owl(_,_,_), X, rdfs_object_branch_factor(X)). + +rdf_optimise:rdf_db_goal(owl(S,P,O,_), S,P,O). +rdf_optimise:subj_branch_factor(owl(_,_,_,_), X, rdfs_subject_branch_factor(X)). +rdf_optimise:obj_branch_factor(owl(_,_,_,_), X, rdfs_object_branch_factor(X)). + + +:- rdf_meta + owl(r,r,o), + owl(r,r,o,?), + owl_inv(r,r,o), + owl_inv(r,r,o,?), + owl_same_inv(r,r,o), + owl_same_inv(r,r,o,?), + owl_similar(r,r,o), + owl_similar(r,r,o,?), + inverse_predicate(r,r). + + +%% owl(?S, ?P, ?O, ?RealP) is nondet. +% +% True if rdf_has(S1,P,O1,RealP) is true and S,S1 as well as O,O1 are +% reachable using owl:sameAs. + +owl(S,P,O) :- + owl(S,P,O,_). + +owl(S,P,O,P) :- %sorry we can't do subproperty here + atom(P), + rdf_equal(owl:sameAs, P), !, + same(S,O). + +owl(S,P,O,SP) :- + instantiated(S,O,I), + owl(I,S,P,O,SP). + +owl_subj(S,P,V,Value) :- + same(V, Value), + rdf_has(S, P, Value). + +instantiated(S,O,I) :- + ( atom(S) -> I0 = 0b10 ; I0 = 0b00 ), + ( atom(O) -> I is I0\/0b01 ; I is I0\/0b00 ). + +owl(0b00, S,P,O,SP) :- rdf_value(S,P,O,SP). +owl(0b10, S,P,O,SP) :- same(S, S0), rdf_value(S0,P,O,SP). +owl(0b01, S,P,O,SP) :- same(O, O0), rdf_value(S,P,O0,SP). +owl(0b11, S,P,O,SP) :- same(S, S0), same(O, O0), rdf_value(S0,P,O0,SP). + +rdf_value(S,P,O,SP) :- + rdf_has(S,P,O,SP). +rdf_value(S,P,O,SP) :- + nonvar(O), !, + rdf(S1,rdf:value,O), + rdf_has(S,P,S1,SP). + + +%% owl_similar(?S, ?P, ?O, ?RealP) is nondet. +% +% Behaves the same as owl/3, but also uses skos:exactMatch. + +owl_similar(S,P,O) :- + owl_similar(S,P,O,_). + +owl_similar(S,P,O,P) :- %sorry we can't do subproperty here + atom(P), + ( rdf_equal(owl:sameAs, P) + ; rdf_equal(skos:exactMatch, P) + ), !, + similar(S,O). + +owl_similar(S,P,O,SP) :- + instantiated(S,O,I), + owl_similar(I,S,P,O,SP). + +owl_similar(0b00, S,P,O,SP) :- rdf_value(S,P,O,SP). +owl_similar(0b10, S,P,O,SP) :- similar(S, S0), rdf_value(S0,P,O,SP). +owl_similar(0b01, S,P,O,SP) :- similar(O, O0), rdf_value(S,P,O0,SP). +owl_similar(0b11, S,P,O,SP) :- similar(S, S0), similar(O, O0), rdf_value(S0,P,O0,SP). + + + +%% owl_inv(?S, ?P, ?V, ?SP) +% +% As rdf_has/4 but include inverse properties + +owl_inv(S, P, O) :- + owl_inv(S, P, O, _). + +owl_inv(S, P, O, SP) :- + rdf_has(S, P, O, SP). +owl_inv(S, P, O, SP) :- + ground(P), !, + inverse_predicate(P, IP), + rdf_has(O, IP, S, SP). + + +%% owl_same_inv(-S,-P,-V) +% +% As rdf_has/3 but include inverse properties + +owl_same_inv(S, P, O) :- + owl_same_inv(S, P, O, _). + +owl_same_inv(S, P, O, Src) :- + owl(S, P, O, Src). +owl_same_inv(S, P, O, SP) :- + ground(P), !, + inverse_predicate(P, IP), + owl(O, IP, S, SP). + + +%% same(+R0, -R) is nondet. +%% same(-R0, +R) is nondet. +% +% True if R is R0 or reachable through owl:sameAs relations. + +same(R0, R) :- + atom(R0), !, + empty_assoc(V0), + put_assoc(R0, V0, true, V), + same(R0, R, V). +same(R0, R) :- + atom(R), !, + same(R, R0). +same(R0, _R) :- + instantiation_error(R0). + +same(R, R, _). +same(R0, R, V) :- + ( rdf_has(R0, owl:sameAs, R1) + ; rdf_has(R1, owl:sameAs, R0) + ), + \+ get_assoc(R1, V, true), + put_assoc(R1, V, true, V2), + same(R1, R, V2). + + +%% similar(+R0, -R) is nondet. +% +% True if R is R0 or reachable through owl:sameAs or +% skos:exactMatch relations. + +similar(R0, R) :- + empty_assoc(V0), + put_assoc(R0, V0, true, V), + similar(R0, R, V). + +similar(R, R, _). +similar(R0, R, V) :- + ( rdf_has(R0, owl:sameAs, R1) + ; rdf_has(R1, owl:sameAs, R0) + ; rdf_has(R0, skos:exactMatch, R1) + ; rdf_has(R1, skos:exactMatch, R0) + ), + \+ get_assoc(R1, V, true), + put_assoc(R1, V, true, V2), + similar(R1, R, V2). + +%% inverse_predicate(+P1, +P2) is semidet. +% +% True if P1 and P2 are each others inverses. + +inverse_predicate(P1, P2) :- + rdf_has(P1, owl:inverseOf, P2), !. +inverse_predicate(P1, P2) :- + rdf_has(P2, owl:inverseOf, P1), !. +inverse_predicate(P, P) :- + rdf(P, rdf:type, owl:'SymmetricProperty'). + + +%% equivalence_set(+R, -Set) +% +% Set contains R and all its equivalent resources. + +equivalence_set(R, Set) :- + findall(S, same(R, S), Set). + diff --git a/lib/cluster_search/parameters.pl b/lib/cluster_search/parameters.pl new file mode 100644 index 0000000..603e415 --- /dev/null +++ b/lib/cluster_search/parameters.pl @@ -0,0 +1,231 @@ +/* This file is part of ClioPatria. + + Author: Michiel Hildebrand + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(parameters, + [ http_options/3, % :Goal, -Options, -Parameters + json_options_to_terms/2, % +OptionListIn, -OptionListOut + json_to_term/2, % +JSONInput, -Term + json_to_resource/2 % +JSON, -RDFResource + ]). + +:- use_module(library(http/json_convert)). +:- use_module(library(http/json)). +:- use_module(library(http/http_parameters)). + +:- use_module(library(semweb/rdf_db)). + +/** <module> Utilities for fetching HTTP request parameters +*/ + + +:- meta_predicate + http_options(4, -, -). + +%% http_options(:OptionType, -Options, -HTTPParameters) is det. +% +% Options are all option clauses defined for OptionType. +% HTTPParameters is a specification list for http_parameters/3. +% OptionType is called as: +% +% OptionType(Name, Type, Default, Description) +% +% Name is the name of the HTTP parameter (and the option), Type is +% a type as supported by http_parameters/3. If Default is given, +% it acts as a default value for the option. Otherwise the +% parameter is optional. Description is used by the help system. + +http_options(F, Options, Parameters) :- + findall(O-P, + ( call(F, Name, Type, Default, Description), + O =.. [Name,Value], + pterm(Name,Value,Type,Default,Description,P) + ), + Pairs), + pairs_keys_values(Pairs, Options, Parameters0), + make_group(F, Parameters0, Parameters). + +has_groups :- + clause(http_parameters:fill_parameter(P, _, _), _Body), + nonvar(P), + P = group(_,_), !. + +:- if(has_groups). +make_group(M:F, Parameters, [group(Parameters, [generated(M:Name/Arity)])]) :- + functor(F, Name, A0), + Arity is A0 + 4. +:- else. +make_group(_, Parameters, Parameters). +:- endif. + + + +pterm(Name, Value, Type, Default, Description, P) :- + P =.. [Name, Value, Properties], + phrase(ptype(Type, IsList), Properties, P2), + phrase(pdef(IsList, Default), P2, P3), + phrase(pdesc(Description), P3). + +%% ptype(+Type, -IsList)// is det. +% +% Translate our single type specification into an option list for +% a parameter in http_parameters/3. +% +% @tbd http_parameters/3 has been extended in 5.7.14. We do not +% use the full potential yet. + +ptype(zero_or_more(Type), list) --> !, + [ zero_or_more, Type ]. +ptype(zero_or_more, list) --> !, + [ zero_or_more ]. +ptype(list(Type), list) --> !, + ( { pl5714 } + -> [ list(Type) ] + ; [ zero_or_more, Type ] + ). +ptype(nonneg, nolist) --> + { \+ pl5714 }, !, + [ integer ]. +ptype(atom, nolist) --> !. +ptype(Type, nolist) --> + [ Type ]. + +pl5714 :- + clause(http_parameters:check_type3(nonneg, _, _), _Body). + +pdef(list, _) --> !. +pdef(_, Default) --> + { nonvar(Default) }, !, + [ default(Default) ]. +pdef(_, _) --> + [ optional(true) ]. + +pdesc(Description) --> + { atom(Description), + Description \== '' + }, !, + [ description(Description) ]. +pdesc(_) --> []. + + /******************************* + * HOOKS * + *******************************/ + +:- multifile + http_help:evaluate/1, + http:convert_parameter/3. + +http_help:evaluate(http_options(_,_,_)). + +%% http:convert_parameter(+Type, +In, -URI) is semidet. +% +% HTTP parameter conversion for the following types: +% +% * uri +% This conversion accepts NS:Local and absolute URIs. +% +% * json(+Module) +% Input is JSON and translated into a Prolog term using +% declarations in Module. + +http:convert_parameter(uri, In, URI) :- + ( sub_atom(In, B, _, A, :), + sub_atom(In, _, A, 0, Local), + xml_name(Local) + -> sub_atom(In, 0, B, _, NS), + rdf_global_id(NS:Local, URI) + ; is_absolute_url(In) + -> URI = In + ). +http:convert_parameter(json(Module), Atom, Term) :- + atom_json_term(Atom, JSON, []), + json_to_prolog(JSON, Module:Term). + + + /******************************* + * JSON * + *******************************/ + +%% json_options_to_terms(+OptionListJsonAtoms, -OptionList) +% +% Convert the option values that are an atom containing json +% to prolog terms. +% + +json_options_to_terms([], []). +json_options_to_terms([Option0|T], [Option1|Rest]) :- + Option0 =.. [F,V], + Option1 =.. [F,Term], + json_to_term(V, Term), + json_options_to_terms(T, Rest). + + +%% json_to_term(+JSONInput, -Term) +% +% Term is the prolog term corresponding +% with the JSON written from an input stream. + +json_to_term(List, TermList) :- + is_list(List), !, + json_list_to_term(List, TermList). +json_to_term(Atom, Term) :- + % make sure we have a json term + % this is required because atom_json_term + % fails otherwise + ( sub_atom(Atom, 0, 1, _, '{') + ; sub_atom(Atom, 0, 1, _, '[') + ), !, + atom_json_term(Atom,JSON,[]), + simple_json_to_prolog(JSON, Term). +json_to_term(Atom, Atom). + +json_list_to_term([], []). +json_list_to_term([H|T], [Term|Rest]) :- + json_to_term(H, Term), + json_list_to_term(T, Rest). + + +simple_json_to_prolog(json(Pairs), Terms) :- !, + simple_json_pairs_to_prolog(Pairs, Terms). +simple_json_to_prolog(Key=JSON, Key=Value) :- !, + simple_json_to_prolog(JSON, Value). +simple_json_to_prolog(Atom, Atom). + +simple_json_pairs_to_prolog([], []). +simple_json_pairs_to_prolog([Pair|Ps], [Term|Ts]) :- + simple_json_to_prolog(Pair, Term), + simple_json_pairs_to_prolog(Ps, Ts). + + +json_to_resource(List, Resource) :- + atom_list_to_json_term(List, JSON), + json_to_prolog(JSON, Resource). + +atom_list_to_json_term([], []). +atom_list_to_json_term([H|T], [Term|Rest]) :- + atom_json_term(H, Term, []), + atom_list_to_json_term(T, Rest). + +:- json_object + literal(value:_) + [type=literal], + type(type:atom, literal:atom), + lang(lang:atom, literal:atom). diff --git a/lib/cluster_search/rdf_abstract.pl b/lib/cluster_search/rdf_abstract.pl new file mode 100644 index 0000000..8f5672c --- /dev/null +++ b/lib/cluster_search/rdf_abstract.pl @@ -0,0 +1,1128 @@ +/* This file is part of ClioPatria. + + Author: Jan Wielemaker <wielemak@science.uva.nl> + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_abstract, + [ merge_sameas_graph/2, % +GraphIn, -GraphOut + merge_sameas_graph/3, % +GraphIn, -GraphOut, +Options + bagify_graph/4, % +GraphIn, -GraphOut, -Bags, +Options + graph_resources/2, % +Graph, -Resources + abstract_graph/3, % +GraphIn, -GraphOut, +Options + concept_of/2 % +Resource, -Concept + ]). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(assoc)). +:- use_module(library(option)). +:- use_module(library(pairs)). +:- use_module(library(ordsets)). +:- use_module(library(debug)). +:- use_module(library(apply)). +:- use_module(library(lists)). +:- use_module(library(settings)). + +/** <module> Abstract RDF graphs + +The task of this module is to do some simple manipulations on RDF graphs +represented as lists of rdf(S,P,O). Supported operations: + + * merge_sameas_graph(+GraphIn, -GraphOut, +Options) + Merge nodes by owl:sameAs + + * bagify_graph(+GraphIn, -GraphOut, -Bags, +Options) + Bagify a graph, returning a new graph holding bags of resources + playing a similar role in the graph. + + * abstract_graph(+GraphIn, -GraphOut, +Options) + Abstract nodes or edges using rdf:type, rdfs:subClassOf and/or + rdfs:subPropertyOf +*/ + +%% merge_sameas_graph(GraphIn, GraphOut) is det. +%% merge_sameas_graph(GraphIn, GraphOut, +Options) is det. +% +% Collapse nodes in GraphIn that are related through an identity +% mapping. By default, owl:sameAs is the identity relation. +% Options defines: +% +% * predicate(-PredOrList) +% Use an alternate or list of predicates that are to be +% treated as identity relations. +% +% * sameas_mapped(-Assoc) +% Assoc from resources to the resource it was mapped to. + +:- rdf_meta + merge_sameas_graph(+, -, t). + +merge_sameas_graph(GraphIn, GraphOut) :- + merge_sameas_graph(GraphIn, GraphOut, []). +merge_sameas_graph(GraphIn, GraphOut, Options) :- + sameas_spec(Options, SameAs), + sameas_map(GraphIn, SameAs, Assoc), % R->EqSet + ( empty_assoc(Assoc) + -> GraphOut = GraphIn, + empty_assoc(EqMap) + ; assoc_to_list(Assoc, List), + pairs_values(List, EqSets), + sort(EqSets, UniqueEqSets), + map_list_to_pairs(rdf_representative, UniqueEqSets, Keyed), % Repr-EqSet + representer_map(Keyed, EqMap), + map_graph(GraphIn, EqMap, GraphOut), + ( debugging(abstract) + -> length(GraphIn, Before), + length(GraphOut, After), + debug(abstract, 'owl:sameAs reduction: ~D --> ~D edges', [Before, After]) + ; true + ) + ), + option(sameas_mapped(EqMap), Options, _). + +sameas_spec(Options, SameAs) :- + rdf_equal(owl:sameAs, OwlSameAs), + option(predicate(SameAs0), Options, OwlSameAs), + ( is_list(SameAs0) + -> SameAs = SameAs0 + ; SameAs = [SameAs0] + ). + +%% sameas_map(+Graph, +SameAs, -Map:assoc) is det. +% +% Create an assoc with R->Set, where Set contains an ordered set +% of resources equivalent to R. + +sameas_map(Graph, SameAs, Assoc) :- + empty_assoc(Assoc0), + sameas_map(Graph, SameAs, Assoc0, Assoc). + +sameas_map([], _, Assoc, Assoc). +sameas_map([rdf(S, P, O)|T], SameAs, Assoc0, Assoc) :- + same_as(P, SameAs), + S \== O, !, + ( get_assoc(S, Assoc0, SetS) + -> ( get_assoc(O, Assoc0, SetO) + -> ord_union(SetO, SetS, Set) + ; ord_union([O], SetS, Set) + ) + ; ( get_assoc(O, Assoc0, SetO) + -> ord_union([S], SetO, Set) + ; sort([S,O], Set) + ) + ), + putall(Set, Assoc0, Set, Assoc1), + sameas_map(T, SameAs, Assoc1, Assoc). +sameas_map([_|T], SameAs, Assoc0, Assoc) :- + sameas_map(T, SameAs, Assoc0, Assoc). + +putall([], Assoc, _, Assoc). +putall([H|T], Assoc0, Value, Assoc) :- + put_assoc(H, Assoc0, Value, Assoc1), + putall(T, Assoc1, Value, Assoc). + + +%% same_as(+Predicate:resource, +SameAs:list) is semidet. +% +% True if Predicate expresses a same-as mapping. + +same_as(P, Super) :- + member(S, Super), + rdfs_subproperty_of(P, S), !. + + +%% representer_map(+List:list(Repr-Set), -Assoc) is det. +% +% Assoc maps all elements of Set to its representer. + +representer_map(Keyed, EqMap) :- + empty_assoc(Assoc0), + representer_map(Keyed, Assoc0, EqMap). + +representer_map([], Assoc, Assoc). +representer_map([R-Set|T], Assoc0, Assoc) :- + putall(Set, Assoc0, R, Assoc1), + representer_map(T, Assoc1, Assoc). + + + /******************************* + * BAGIFY * + *******************************/ + +%% bagify_graph(+GraphIn, -GraphOut, -Bags, +Options) is det. +% +% If a graph contains multiple objects of the same type (class) in +% the same location in the graph (i.e. all links are the same), +% create a *bag*. The bag is represented by a generated resource +% of type rdf:Bag and the RDF for the bags is put in Bags. I.e. +% appending GraphOut and Bags provides a proper RDF model. Options +% provides additional abstraction properties. In particular: +% +% * class(+Class) +% Try to bundle objects under Class rather than their +% rdf:type. Multiple of these options may be defined +% +% * property(+Property) +% Consider predicates that are an rdfs:subPropertyOf +% Property the same relations. +% +% * bagify_literals(+Bool) +% If =true= (default), also try to put literals into a +% bag. Works well to collapse non-preferred labels. +% +% @tbd Handle the property option + +:- rdf_meta + bagify_graph(+, -, -, t). + +bagify_graph(GraphIn, GraphOut, Bags, Options) :- + canonise_options(Options, Options1), + partition_options(class, Options1, ClassOptions, Options2), + graph_node_edges(GraphIn, AssocNodesToEdges, Options2), + assoc_to_list(AssocNodesToEdges, NodesToEdges), + pairs_keys(NodesToEdges, Nodes), + group_resources_by_class(Nodes, ByClass, ClassOptions), + resource_bags(ByClass, NodesToEdges, RawBags), + ( debugging(abstract) + -> length(RawBags, Len), + maplist(length, RawBags, BagLens), + sumlist(BagLens, ObjCount), + debug(abstract, 'Created ~D bags holding ~D objects', [Len, ObjCount]) + ; true + ), + assign_bagids(RawBags, IDBags), + representer_map(IDBags, Assoc), + map_graph(GraphIn, Assoc, GraphOut0), + merge_properties(GraphOut0, GraphOut, Options2), + make_rdf_graphs(IDBags, Bags). + +partition_options(Name, Options, WithName, WithoutName) :- + partition(option_name(Name), Options, WithName, WithoutName). + +option_name(Name, Option) :- + functor(Option, Name, 1). + +%% canonise_options(+OptionsIn, -OptionsOut) is det. +% +% Rewrite option list from possible Name=Value to Name(Value) + +canonise_options(In, Out) :- + memberchk(_=_, In), !, % speedup a bit if already ok. + canonise_options2(In, Out). +canonise_options(Options, Options). + +canonise_options2([], []). +canonise_options2([Name=Value|T0], [H|T]) :- !, + H =.. [Name,Value], + canonise_options2(T0, T). +canonise_options2([H|T0], [H|T]) :- !, + canonise_options2(T0, T). + + + + +%% group_resources_by_class(+Resources, -ByClass, +Options) is det. +% +% ByClass is a list of lists of resources that belong to the same +% class. First step we process the classes specified in Options. + +group_resources_by_class([], [], _) :- !. +group_resources_by_class(Resources, ByClass, Options) :- + select_option(class(Class), Options, Options1), !, + ( partition(has_class(sub_class, Class), Resources, InClass, NotInClass), + InClass \== [] + -> ByClass = [InClass|ByClass1], + group_resources_by_class(NotInClass, ByClass1, Options1) + ; group_resources_by_class(Resources, ByClass, Options1) + ). +group_resources_by_class([H|T0], [[H|S]|T], Options) :- + class_of(H, exact, Class), + partition(has_class(exact, Class), T0, S, T1), + group_resources_by_class(T1, T, Options). + +%% has_class(+Match, +Class, +Node) is semidet. + +has_class(Match, Class, Node) :- + class_of(Node, Match, Class). + +%% class_of(+Node, +Match, -Class) is det. +%% class_of(+Node, +Match, +Class) is semidet. + +class_of(Node, sub_class, Class) :- !, + rdfs_individual_of(Node, Class), !. +class_of(literal(_), exact, Literal) :- !, + rdf_equal(Literal, rdfs:'Literal'). +class_of(R, exact, Class) :- + rdf_has(R, rdf:type, Class), !. +class_of(_, exact, Class) :- + rdf_equal(Class, rdfs:'Resource'). + + +%% resource_bags(+ByClass:list(list(resource)), +%% +NodeToEdges:list(node-list(edges)), +%% -RawBags:list(list(resource))) is det. +% +% Find bags of resources that have the same connections. + +resource_bags(ByClass, NodeToEdges, Bags) :- + phrase(resource_bags(ByClass, NodeToEdges), Bags). + +resource_bags([], _) --> + []. +resource_bags([ByClassH|ByClassT], NodeToEdges) --> + { sort(ByClassH, SortedNodes), + ord_subkeys(SortedNodes, NodeToEdges, SubNodeToEdges), + same_edges(SubNodeToEdges, Bags) + }, + Bags, + resource_bags(ByClassT, NodeToEdges). + +%% ord_subkeys(+Keys, +Pairs, -SubPairs) is det. +% +% SubPairs is the sublist of Pairs with a key in Keys. +% +% @param Keys Sorted list of keys +% @param Pairs Key-sorted pair-list +% @param SubPairs Key-sorted pair-list + +ord_subkeys([], _, []). +ord_subkeys([K|KT], [P|PT], Pairs) :- + P = PK-_, + compare(Diff, K, PK), + ord_subkeys(Diff, K, KT, P, PT, Pairs). + +ord_subkeys(=, _, KT, P, PT, [P|Pairs]) :- !, + ord_subkeys(KT, PT, Pairs). +ord_subkeys(<, _, [K|KT], P, PT, Pairs) :- + P = PK-_, + compare(Diff, K, PK), + ord_subkeys(Diff, K, KT, P, PT, Pairs). +ord_subkeys(>, K, KT, _, [P|PT], Pairs) :- + P = PK-_, + compare(Diff, K, PK), + ord_subkeys(Diff, K, KT, P, PT, Pairs). + + +%% same_edges(+NodeToEdges:list(node-edges), -Bags:list(list)) is det. +% +% Bags is a list of lists of resources (nodes) that share the same +% (abstracted) edges with the rest of the graph. + +same_edges(NodeToEdges, Bags) :- + transpose_pairs(NodeToEdges, ByEdges), % list(edges-node) + keysort(ByEdges, Sorted), + group_pairs_by_key(Sorted, Grouped), + pairs_values(Grouped, AllBySameEdge), + include(longer_than_one, AllBySameEdge, Bags). + +longer_than_one([_,_|_]). + +%% graph_node_edges(+Graph, -NodeEdges:assoc, +Options) is det. +% +% NodeEdges is an assoc from resource to a sorted list of involved +% triples. Only subject and objects are considered. +% +% Processes =bagify_literals= and =property= options + +graph_node_edges(Graph, Assoc, Options) :- + option(bagify_literals(LitToo), Options, true), + property_map(Options, Map0), + empty_assoc(Assoc0), + graph_node_edges(Graph, LitToo, Map0, Assoc0, Assoc1), + map_assoc(sort, Assoc1, Assoc). + +graph_node_edges([], _, _, Assoc, Assoc). +graph_node_edges([rdf(S,P,O)|T], LitToo, Map, Assoc0, Assoc) :- + abstract_property(P, Map, SP, Map1), + add_assoc(S, Assoc0, rdf(-, SP, O), Assoc1), + ( (atom(O) ; LitToo == true ) + -> add_assoc(O, Assoc1, rdf(S, SP, -), Assoc2) + ; Assoc2 = Assoc1 + ), + graph_node_edges(T, LitToo, Map1, Assoc2, Assoc). + +add_assoc(Key, Assoc0, Value, Assoc) :- + get_assoc(Key, Assoc0, Old, Assoc, [Value|Old]), !. +add_assoc(Key, Assoc0, Value, Assoc) :- + put_assoc(Key, Assoc0, [Value], Assoc). + + +%% property_map(+Options, -Map:assoc(P-Super)) +% +% Process the options, creating a map that replaces a property by +% its registered super. + +property_map(Options, Map) :- + empty_assoc(Map0), + property_map(Options, Map0, Map). + +property_map([], Map, Map). +property_map([property(P)|T], Map0, Map) :- !, + ( rdfs_subproperty_of(P, Super), + get_assoc(Super, Map0, Root) + -> put_assoc(P, Map0, Root, Map1) + ; put_assoc(P, Map0, P, Map1) + ), + property_map(T, Map1, Map). +property_map([_|T], Map0, Map) :- + property_map(T, Map0, Map). + +%% abstract_property(+P0, +Map0, -P, -Map) is det. +% +% Find the abstract property for some property P. + +abstract_property(P0, Map0, P, Map) :- + get_assoc(P0, Map0, P), !, + Map = Map0. +abstract_property(P, Map0, Root, Map) :- + rdfs_subproperty_of(P, Super), + get_assoc(Super, Map0, Root), !, + debug(abstract(property), 'Mapped ~p --> ~p', [P, Root]), + put_assoc(P, Map0, Root, Map). +abstract_property(P, Map, P, Map). + + +%% assign_bagids(+Bags:list(bag), -IDBags:list(id-bag)). +% +% Assign bag identifiers to the each bag in Bags. + +assign_bagids(Bags, IDBags) :- + assign_bagids(Bags, 1, IDBags). + +assign_bagids([], _, []). +assign_bagids([H|T0], I, [Id-H|T]) :- + atom_concat('__bag_', I, Id), + I2 is I + 1, + assign_bagids(T0, I2, T). + + +%% make_rdf_graphs(+IDBags, -RDFBags) is det. +% +% Translate BagID-Members into an RDF graph. + +:- rdf_meta + statement(r,r,o,?,?). % statement//3 + +make_rdf_graphs(IDBags, RDFBags) :- + phrase(make_rdf_graphs(IDBags), RDFBags). + +make_rdf_graphs([]) --> + []. +make_rdf_graphs([ID-Members|T]) --> + statement(ID, rdf:type, rdf:'Bag'), + bag_members(Members, 0, ID), + make_rdf_graphs(T). + +bag_members([], _, _) --> + []. +bag_members([H|T], I, ID) --> + { I2 is I + 1, + atom_concat('_:', I, P) + }, + statement(ID, P, H), + bag_members(T, I2, ID). + +statement(S, P, O) --> + [ rdf(S, P, O) ]. + + + + /******************************* + * MERGE PROPERTIES * + *******************************/ + +%% merge_properties(+GraphIn, -GraphOut, +Options) is det. +% +% Merge equivalent properties joining the same nodes. They are +% replaced by their common ancestors. +% +% @param GraphIn List of rdf(S,P,O) +% @param GraphOut List of rdf(S,P,O) +% @param Options Option list (unused) + +merge_properties([], [], _). +merge_properties([rdf(S,P,O)|GraphIn], GraphOut, Options) :- + memberchk(rdf(S,_,O), GraphIn), !, + partition(same_so(S,O), GraphIn, Same, Rest), + maplist(pred, Same, Preds), + sort([P|Preds], UniquePreds), + common_ancestor_forest(sub_property_of, UniquePreds, Forest), + pairs_keys(Forest, Roots), + debug(abstract, 'Merged ~p --> ~p', [UniquePreds, Roots]), + mk_p_triples(Roots, S, O, GraphOut, Out2), + merge_properties(Rest, Out2, Options). +merge_properties([Triple|GraphIn], [Triple|GraphOut], Options) :- + merge_properties(GraphIn, GraphOut, Options). + +same_so(S, O, rdf(S, _, O)). +pred(rdf(_,P,_), P). + +mk_p_triples([], _, _) --> []. +mk_p_triples([P|T], S, O) --> + [rdf(S,P,O)], + mk_p_triples(T, S, O). + +sub_property_of(P, Super) :- + rdf_has(P, rdfs:subPropertyOf, Super). + + +%% common_ancestor_forest(:Pred, +Objects, -Forest) is det. +% +% Forest is a minimal set of minimal spanning trees with real +% branching (more than one child per node) covering all Objects. +% The partial ordering is defined by the non-deterministic goal +% call(Pred, +Node, -Parent). +% +% * Build up a graph represented as Node->Children and +% a list of roots. The initial list of roots is Objects. +% The graph is built using breath-first search to minimize +% depth. +% +% * Once we have all roots, we delete all branches that +% have only a single child. +% +% @param Forest is a list of trees. Each tree is represented +% as Root-Children, where Children is a possibly +% empty list if sub-trees. +% +% @tbd First prune dead-ends? +% +% == +% rdf_db:rdf_global_term([ulan:assisted_by, ulan:cousin_of], In), +% gtrace, +% rdf_abstract:common_ancestor_forest(sub_property_of, In, Out). +% == + +:- meta_predicate + common_ancestor_forest(2, +, -). + +common_ancestor_forest(Pred, Objects, Forest) :- + strip_module(Pred, M, P), + sort(Objects, Objects1), + keys_to_assoc(Objects1, target*[], Nodes0), + ancestor_tree(Objects1, M:P, Nodes0, Nodes, Roots), + prune_forest(Nodes, Roots, Forest), + debug(common_ancestor, 'Ancestors of ~p: ~p', [Objects1, Forest]). + +%% keys_to_assoc(+Keys:list, +Value, -Assoc) is det. +% +% True if Assoc is an assoc where each Key maps to Value. + +keys_to_assoc(Keys, Value, Assoc) :- + empty_assoc(Assoc0), + keys_to_assoc(Keys, Assoc0, Value, Assoc). + +keys_to_assoc([], Assoc, _, Assoc). +keys_to_assoc([H|T], Assoc0, Value, Assoc) :- + put_assoc(H, Assoc0, Value, Assoc1), + keys_to_assoc(T, Assoc1, Value, Assoc). + + +ancestor_tree(Objects, Pred, Nodes0, Nodes, Roots) :- + ancestor_tree(Objects, [], Objects, Pred, Nodes0, Nodes, Roots). + +%% ancestor_tree(+Open, +Closed, +Targets, :Pred, +NodesIn, +%% -NodesOut, -Roots) is det. +% +% Explore the ancestor graph one more step. This is the main loop +% looking for a spanning tree. We are done if +% +% * There is only one open node left and no closed ones. +% We found the single common root. +% +% * No open nodes are left. We have a set of closed roots +% which form our starting points. We still have to figure +% out the minimal set of these, as some of the trees may +% overlap others. +% +% * We have an open node covering all targets. This is the +% lowest one as we used breath-first expansion. This step +% is too expensive. + +ancestor_tree([One], [], _, _, Nodes, Nodes, [One]) :- !. +ancestor_tree([], Closed, _, _, Nodes, Nodes, Closed) :- !. +ancestor_tree(Open, _, Objects, _, Nodes, Nodes, [One]) :- + member(One, Open), + tree_covers(One, Nodes, Objects), !. +ancestor_tree(Open, Closed, Objects, Pred, Nodes0, Nodes, Roots) :- + expand_ancestor_tree(Open, NewOpen, NewClosed, Closed, Nodes0, Nodes1, Pred), + ancestor_tree(NewOpen, NewClosed, Objects, Pred, Nodes1, Nodes, Roots). + + +%% expand_ancestor_tree(+Open0, -Open, +%% +Closed0, -Closed, +%% +Nodes0, -Nodes, +%% :Pred) +% +% Expand the explored graph with one level. Open are the currently +% open nodes. Closed are the nodes that have no parent and +% therefore are roots. +% +% @param Nodes is an assoc R->(State*list(Child)) + +expand_ancestor_tree([], [], Closed, Closed, Nodes, Nodes, _). +expand_ancestor_tree([H|T], Open, Closed0, Closed, Nodes0, Nodes, Pred) :- + setof(Parent, call(Pred, H, Parent), Parents), !, + add_parents(Parents, H, Open, OpenT, Nodes0, Nodes1), + expand_ancestor_tree(T, OpenT, Closed0, Closed, Nodes1, Nodes, Pred). +expand_ancestor_tree([H|T], Open, [H|ClosedT], Closed, Nodes0, Nodes, Pred) :- + expand_ancestor_tree(T, Open, ClosedT, Closed, Nodes0, Nodes, Pred). + + +%% add_parents(+Parents:list, +Child, -NR, +NRT, +Nodes0, -Nodes) +% +% Add links Parent->Child to the tree Nodes0. The difference list +% NR\NRT contains Parents added new to the tree. + +add_parents([], _, NP, NP, Nodes, Nodes). +add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :- + in_tree(Child, H, Nodes0), !, + add_parents(T, Child, NP, NPT, Nodes0, Nodes). +add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :- + get_assoc(H, + Nodes0, State*Children, + Nodes1, State*[Child|Children]), !, + add_parents(T, Child, NP, NPT, Nodes1, Nodes). +add_parents([H|T], Child, [H|NP], NPT, Nodes0, Nodes) :- + put_assoc(H, Nodes0, node*[Child], Nodes1), + add_parents(T, Child, NP, NPT, Nodes1, Nodes). + + +%% in_tree(?Node, +Root, +Nodes) is nondet. +% +% True if Node appears in the tree below Root. + +in_tree(Node, Node, _). +in_tree(Node, Root, Nodes) :- + get_assoc(Root, Nodes, _State*Children), + member(Child, Children), + in_tree(Node, Child, Nodes). + + +%% prune_forest(+Nodes, +Roots, -MinimalForest) is det. +% +% MinimalForest is the minimal forest overlapping all targets. +% +% @tbd Currently doesn't remove unnecessary trees. + +prune_forest(Nodes, Roots, Forest) :- + maplist(prune_root(Nodes), Roots, Roots1), + sort(Roots1, Roots2), + maplist(prune_ancestor_tree(Nodes), Roots2, Forest0), + sort(Forest0, Forest). + +%% prune_root(+Nodes, +Root0, -Root) is det. +% +% Prune the parts of the search tree that ended up nowhere. The +% first real branch is where we find a solution or there are +% multiple parents. This avoids doing double work pruning the +% trees itself. + +prune_root(Nodes, Root0, Root) :- + get_assoc(Root0, Nodes, node*[One]), !, + prune_root(Nodes, One, Root). +prune_root(_, Root, Root). + +%% prune_ancestor_tree(Nodes, Root, Tree) is det. +% +% Tree is a pruned hierarchy from Root using the branching paths of +% Nodes. + +prune_ancestor_tree(Nodes, Root, Tree) :- + get_assoc(Root, Nodes, Value), + ( Value = node*[One] + -> prune_ancestor_tree(Nodes, One, Tree) + ; Tree = (Root-Children), + Value = _*Children0, + maplist(prune_ancestor_tree(Nodes), Children0, Children) + ). + +%% tree_covers(+Root, +Nodes, -Targets:list) is det. +% +% True if Targets is the sorted list of targets covered by the +% tree for which Root is the root. + +tree_covers(Root, Nodes, Targets) :- + phrase(tree_covers(Root, Nodes), Targets0), + sort(Targets0, Targets). + +tree_covers(Root, Nodes) --> + { get_assoc(Root, Nodes, State*Children) }, + ( {State == target} + -> [Root] + ; [] + ), + tree_covers_list(Children, Nodes). + +tree_covers_list([], _) --> + []. +tree_covers_list([H|T], Nodes) --> + tree_covers(H, Nodes), + tree_covers_list(T, Nodes). + + + /******************************* + * PRIMITIVES * + *******************************/ + +%% map_graph(+GraphIn, +Map:assoc, -GraphOut) is det. +% +% Map a graph to a new graph by mapping all fields of the RDF +% statements over Map. Then delete duplicates from the resulting +% graph as well as rdf(S,P,S) links that did not appear before the +% mapping. +% +% @tbd Should we look inside literals for mapped types? That +% would be consistent with abstract_graph/3. + +map_graph(GraphIn, Map, GraphOut) :- + phrase(map_triples(GraphIn, Map), Graph2), + sort(Graph2, GraphOut). + +map_triples([], _) --> + []. +map_triples([H0|T0], Map) --> + map_triple(H0, Map), + map_triples(T0, Map). + +map_triple(rdf(S0,P0,O0), Map) --> + { map_resource(S0, Map, S), + map_resource(P0, Map, P), + map_object(O0, Map, O) + }, + ( { S == O, S0 \== O0 } + -> [] + ; [ rdf(S,P,O) ] + ). + +map_resource(N0, Map, N) :- + get_assoc(N0, Map, N), !. +map_resource(N, _, N). + +map_object(O0, Map, O) :- + get_assoc(O0, Map, O), !. +map_object(literal(type(T0, V)), Map, L) :- + get_assoc(T0, Map, T), !, + L = literal(type(T, V)). +map_object(O, _, O). + + +%% map_graph(+GraphIn, +Map:assoc, -GraphOut, -AbstractMap) is det. +% +% Map a graph to a new graph by mapping all fields of the RDF +% statements over Map. The nodes in these graphs are terms of the +% form Abstract-list(concrete). +% +% @param AbstractMap assoc Abstract -> ordset(concrete) + +map_graph(GraphIn, Map, GraphOut, AbstractMap) :- + map_graph(GraphIn, Map, GraphOut), + assoc_to_list(Map, ConcAbstr), % Concrete->Abstract + graph_nodes(GraphIn, AllConcrete), + pairs_keys_intersection(ConcAbstr, AllConcrete, UsedConcAbstr), + transpose_pairs(UsedConcAbstr, AbstrConc), + group_pairs_by_key(AbstrConc, Grouped), + list_to_assoc(Grouped, AbstractMap). + + +%% pairs_keys_intersection(+Pairs, +Keys, -PairsInKeys) is det. +% +% True if PairsInKeys is a subset of Pairs whose key appear in +% Keys. Pairs must be key-sorted and Keys must be sorted. E.g. +% +% == +% ?- pairs_keys_intersection([a-1,b-2,c-3], [a,c], X). +% X = [a-1,c-3] +% == + +pairs_keys_intersection(Pairs, [K], Int) :- !, % One key: happens quite often + find_one_key(Pairs, K, Int). +pairs_keys_intersection([P1|TP], [K1|TK], Int) :- !, + compare_pair_key(Diff, P1, K1), + pairs_keys_isect(Diff, P1, TP, K1, TK, Int). +pairs_keys_intersection(_, _, []). + +pairs_keys_isect(<, _, [P1|TP], K1, TK, Int) :- !, + compare_pair_key(Diff, P1, K1), + pairs_keys_isect(Diff, P1, TP, K1, TK, Int). +pairs_keys_isect(=, P, [P1|TP], K1, TK, [P|Int]) :- !, + compare_pair_key(Diff, P1, K1), + pairs_keys_isect(Diff, P1, TP, K1, TK, Int). +pairs_keys_isect(>, P1, TP, _, [K1|TK], Int) :- !, + compare_pair_key(Diff, P1, K1), + pairs_keys_isect(Diff, P1, TP, K1, TK, Int). +pairs_keys_isect(=, P, _, _, _, [P]) :- !. +pairs_keys_isect(_, _, _, _, _, []). + +compare_pair_key(Order, K1-_, K2) :- !, + compare(Order, K1, K2). + +find_one_key([], _, []). +find_one_key([K0-V|T0], K, List) :- + ( K0 == K + -> List = [k0-V|T], + find_one_key(T0, K, T) + ; find_one_key(T0, K, List) + ). + + +%% map_to_bagged_graph(+GraphIn, +Map, -GraphOut, -Bags) is det. +% +% GraphOut is a graph between objects and bags, using the most +% specific common ancestor for representing properties. + +map_to_bagged_graph(GraphIn, Map, GraphOut, Bags) :- + map_graph(GraphIn, Map, AbstractGraph, AbstractMap), +% assertion(map_assoc(is_ordset, AbstractMap)), + empty_assoc(Nodes), + rdf_to_paired_graph(GraphIn, PairGraph), + phrase(bagify_triples(AbstractGraph, PairGraph, AbstractMap, + Nodes, Bags, []), + GraphOut). + +bagify_triples([], _, _, _, Bags, Bags) --> []. +bagify_triples([rdf(S0,_P,O0)|T], PairGraph, Map, Nodes, Bags, BagsT) --> + { bagify_resource(S0, S, Map, Nodes, Nodes1, Bags, BagsT0), + bagify_object(O0, O, Map, Nodes1, Nodes2, BagsT0, BagsT1), + + % normal properties + used_properties(S0, O0, PairGraph, Map, PList), + common_ancestor_forest(sub_property_of, PList, Forest), + debug(used_properties, 'Forest = ~p', [Forest]), + pairs_keys(Forest, PRoots), + % inverse properties + used_properties(O0, S0, PairGraph, Map, IPList), + common_ancestor_forest(sub_property_of, IPList, IForest), + debug(used_properties, 'IForest = ~p', [IForest]), + pairs_keys(IForest, IPRoots) + }, + mk_p_triples(PRoots, S, O), + mk_p_triples(IPRoots, O, S), + bagify_triples(T, PairGraph, Map, Nodes2, BagsT1, BagsT). + + +bagify_resource(R0, R, _Map, Nodes, Nodes) --> + { get_assoc(R0, Nodes, R) }, !. +bagify_resource(R0, BagID, Map, Nodes0, Nodes) --> + { get_assoc(R0, Map, Set), Set = [_,_|_], !, + atom_concat('__rbag_', R0, BagID), + put_assoc(R0, Nodes0, BagID, Nodes) + }, + make_rdf_graphs([BagID-Set]). +bagify_resource(R0, One, Map, Nodes, Nodes) --> + { get_assoc(R0, Map, [One]) }, !. +bagify_resource(R, R, _, Nodes, Nodes) --> []. + +bagify_object(R0, R, Map, Nodes0, Nodes) --> + bagify_resource(R0, R, Map, Nodes0, Nodes). + + +%% rdf_to_paired_graph(+GraphIn, -PairedGraph) is det. +% +% @param GraphIn Graph as list(rdf(S,P,O)) +% @param PairedGraph Graph as list(S-list(O-P)), where the +% pair lists are key-sorted, + +rdf_to_paired_graph(Triples, Pairs) :- + subject_pairs(Triples, Pairs0), + keysort(Pairs0, Pairs1), + group_pairs_by_key(Pairs1, Pairs2), + maplist(keysort_values, Pairs2, Pairs). + +subject_pairs([], []). +subject_pairs([rdf(S,P,O)|T0], [S-(O-P)|T]) :- + subject_pairs(T0, T). + +keysort_values(K-V0, K-V) :- + keysort(V0, V). + + +%% used_properties(+S0, +O0, +GraphIn, +AbstractMap, -PredList) is det. +% +% Find properties actually used between two bags. S0 and O0 are +% the subject and object from the abstract graph. +% +% @param GraphIn original concrete graph represented as pairs. +% See rdf_to_paired_graph/2. +% @param AbstractMap Assoc Abstract->Concrete, where Concrete is +% an ordset of resources. + +used_properties(S0, O0, GraphIn, Map, PList) :- + get_assoc(S0, Map, SList), + get_assoc(O0, Map, OList), + pairs_keys_intersection(GraphIn, SList, Intersection), + pairs_values(Intersection, OPList0), + append(OPList0, OPList1), + keysort(OPList1, OPList), + pairs_keys_intersection(OPList, OList, IntPList), + pairs_values(IntPList, PListDupl), + sort(PListDupl, PList), + debug(used_properties, ' --> ~p', [PList]). + + +%% graph_resources(+Graph, -Resources:list(atom)) is det. +% +% Resources is a sorted list of unique resources appearing in +% Graph. All resources are in Resources, regardless of the role +% played in the graph: node, edge (predicate) or type for a typed +% literal. +% +% @see graph_resources/4 distinguishes the role of the resources. + +graph_resources(Graph, Resources) :- + graph_resources(Graph, R, P, P, T, T, [], _, _), + sort(R, Resources). + +%% graph_nodes(+Graph, -Nodes) is det. +% +% Nodes is a sorted list of all resources and literals appearing +% in Graph. +% +% @tbd Better name + +graph_nodes(Graph, Nodes) :- + graph_resources(Graph, Nodes0, P, P, L, _, _, L, []), + sort(Nodes0, Nodes). + + +%% graph_resources(+Graph, +%% -Resources:list(atom), +%% -Predicates:list(atom), +%% -Types:list(atom)) is det. +% +% Resources is a sorted list of unique resources appearing in +% Graph as subject or object of a triple. Predicates is a list of +% all unique predicates in Graph and Types is a list of all unique +% literal types in Graph. + +graph_resources(Graph, Resources, Preds, Types) :- + graph_resources(Graph, R, [], P, [], T, [], _, _), + sort(R, Resources), + sort(P, Preds), + sort(T, Types). + +graph_resources([], R, R, P, P, T, T, L, L). +graph_resources([rdf(S,P,O)|T], [S|RT0], RT, [P|PTl0], PTl, Tl0, Tl, L0, L) :- + object_resources(O, RT0, RT1, Tl0, Tl1, L0, L1), + graph_resources(T, RT1, RT, PTl0, PTl, Tl1, Tl, L1, L). + + +object_resources(O, R0, R, T0, T, L0, L) :- + ( atom(O) + -> R0 = [O|R], T0 = T, L0 = L + ; O = literal(Val) + -> R0 = R, L0 = [O|L], + ( Val = type(Type, _) + -> T0 = [Type|T] + ; T0 = T + ) + ; assertion(fail) + ). + + + /******************************* + * ABSTRACT * + *******************************/ + +%% abstract_graph(+GraphIn, -GraphOut, +Options) is det. +% +% Unify GraphOut with an abstracted version of GraphIn. The +% abstraction is carried out triple-by-triple. Note there is no +% need to abstract all triples to the same level. We do however +% need to map nodes in the graph consistently. I.e. if we abstract +% the object of rdf(s,p,o), we must abstract the subject of rdf(o, +% p2, o2) to the same resource. +% +% If we want to do incremental growing we must keep track which +% nodes where mapped to which resources. Option? +% +% We must also decide on the abstraction level for a node. This +% can be based on the weight in the search graph, the involved +% properties and focus such as location and time. Should we +% express this focus in the weight? +% +% Options: +% +% * map_in(?Map) +% If present, this is the initial resource abstraction map. +% * map_out(-Map) +% Provide access to the final resource abstraction map. +% * bags(-Bags) +% If provided, _bagify_ the graph, returning the triples that +% define the bags in Bags. The full graph is created by +% appending Bags to GraphOut. +% * merge_concepts_with_super(+Boolean) +% If =true= (default), merge nodes of one is a super-concept +% of another. + +abstract_graph(GraphIn, GraphOut, Options) :- + map_in(Options, MapIn), + graph_resources(GraphIn, Nodes, NT, Edges, [], _T0, _TT, NT, []), + node_map(Nodes, MapIn, Map2, Options), + edge_map(Edges, Map2, MapOut), + map_out(Options, MapOut), + ( option(bags(Bags), Options) + -> map_to_bagged_graph(GraphIn, MapOut, GraphOut, Bags) + ; map_graph(GraphIn, MapOut, GraphOut) + ). + +map_in(Options, Map) :- + option(map_in(Map), Options, Map), + (var(Map) -> empty_assoc(Map) ; true). + +map_out(Options, Map) :- + option(map_out(Map), Options, _). + +%% node_map(+Nodes, +Map0, -Map, +Options) is det. +% +% Create the abstraction map for the nodes of the graph. It +% consists of two steps: +% +% 1. Map all instances to their class, except for concepts +% 2. If some instances are mapped to class A and others to +% class B, where A is a super-class of B, map all instances +% to class A. + +node_map(Nodes, Map0, Map, Options) :- + concepts_of(Nodes, Map0, Map1, _NewConcepts), + ( option(merge_concepts_with_super(true), Options, true) + -> assoc_to_values(Map1, Concepts), + sort(Concepts, Unique), + identity_map(Unique, SuperMap0), + find_broaders(Unique, SuperMap0, SuperMap1), + deref_map(SuperMap1, SuperMap), + map_assoc(map_over(SuperMap), Map1, Map) + ; Map = Map1 + ). + +map_over(Map, V0, V) :- + ( get_assoc(V0, Map, V1) + -> V = V1 + ; V = V0 + ). + +concepts_of([], Map, Map, []). +concepts_of([R|T], Map0, Map, New) :- + get_assoc(R, Map0, _), !, + concepts_of(T, Map0, Map, New). +concepts_of([R|T], Map0, Map, [C|New]) :- + concept_of(R, C), + put_assoc(R, Map0, C, Map1), + concepts_of(T, Map1, Map, New). + +%% identity_map(+List, -Map) is det. +%% find_broaders(+List, +Map0, -Map) is det. +%% deref_map(+Map0, -Map) is det. + +identity_map(List, Map) :- + map_list_to_pairs(=, List, Pairs), + list_to_assoc(Pairs, Map). + +find_broaders([], Map, Map). +find_broaders([C|T], Map0, Map) :- + broader(C, Super), + get_assoc(Super, Map0, SuperSuper), !, + debug(rdf_abstract, 'Mapped ~p to super concept ~p', [C, SuperSuper]), + put_assoc(C, Map0, SuperSuper, Map1), + find_broaders(T, Map1, Map). +find_broaders([_|T], Map0, Map) :- + find_broaders(T, Map0, Map). + + +deref_map(Map0, Map) :- + findall(KV, mapped_kv(KV, Map0), Pairs), + deref(Pairs, NewPairs), + list_to_assoc(NewPairs, Map). + +mapped_kv(K-V, Assoc) :- + gen_assoc(K, Assoc, V), + K \== V. + +%% deref(+Pairs0, NewPairs) is det. +% +% Dereference chains V1-V2, V2-V3 into V1-V3, V2-V3. Note that +% Pairs0 may contain cycles, in which case all the members of the +% cycle are replaced by the representative as defined by +% rdf_representative/2. + +deref(Pairs, NewPairs) :- + list_to_assoc(Pairs, Assoc), + deref(Pairs, Assoc, NewPairs). + +deref([], _, []). +deref([K-V0|T0], Map, [K-V|T]) :- + deref2(V0, Map, [V0], EqSet, V), + ( EqSet == [] + -> deref(T0, Map, T) + ; rdf_representative(EqSet, V), + deref_cycle(T0, EqSet, V, Cycle, T1), + append(Cycle, T2, T), + deref(T1, Map, T2) + ). + +deref2(V0, Map, Visited, EqSet, V) :- + get_assoc(V0, Map, V1), !, + ( memberchk(V1, Visited) + -> EqSet = Visited + ; deref2(V1, Map, [V1|Visited], EqSet, V) + ). +deref2(V, _, _, [], V). + +deref_cycle([], _, _, [], []). +deref_cycle([K-V0|T0], EqSet, V, [K-V|CT], Rest) :- + memberchk(V0, EqSet), !, + deref_cycle(T0, EqSet, V, CT, Rest). +deref_cycle([H|T0], EqSet, V, CT, [H|RT]) :- + deref_cycle(T0, EqSet, V, CT, RT). + + +%% edge_map(+Edges, +MapIn, -MapOut) is det. + +edge_map([], Map, Map). +edge_map([R|T], Map0, Map) :- + get_assoc(R, Map0, _), !, + edge_map(T, Map0, Map). +edge_map([R|T], Map0, Map) :- + iface_abstract_predicate(R, C), + put_assoc(R, Map0, C, Map1), + edge_map(T, Map1, Map). + +%% concept_of(+Resource, -Concept) is det. +% +% True if Concept is the concept Resource belongs to. If Resource +% is a concept itself, Concept is Resource. +% +% @tbd Make thesaurus concept classes a subclass of skos:Class. +% @tbd Put in a reusable place, merge with kwd_search.pl + +concept_of(O, O) :- + iface_concept(O), !. +concept_of(O, C) :- + class_of(O, C). + +%% broader(+Term, -Broader) is nondet. +% +% True if Broader is a broader term according to the SKOS schema. +% +% @tbd Deal with owl:sameAs (and skos:exactMatch) + +broader(Term, Broader) :- + rdf_reachable(Term, skos:broader, Broader), + Broader \== Term. diff --git a/lib/cluster_search/rdf_backward_search.pl b/lib/cluster_search/rdf_backward_search.pl new file mode 100644 index 0000000..a7d8cb2 --- /dev/null +++ b/lib/cluster_search/rdf_backward_search.pl @@ -0,0 +1,147 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_backward_search, + [ rdf_backward_search/4 % +KeyWord, +TargetCond, -State, +Options + ]). + +:- use_module(rdf_search). +:- use_module(rdf_graph). +:- use_module(library(assoc)). +:- use_module(library(url)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(owl_ultra_lite). + +/** <module> Direct metadata search on RDF graph + +@author Michiel Hildebrand, on top of search facilities from rdf_search.pl +*/ + +:- meta_predicate + rdf_backward_search(+, 1, -, +). + +%% rdf_backward_search(+Keyword, :TargetCond, -State, +Options) +% +% Initiate a graph search by traversing resources in backwards fashion, +% thus only considing the triple where the current node is an object. +% +% Options: see rdf_search/4 + +rdf_backward_search(Keyword, TargetCond, State, Options) :- + Expand = rdf_backward_search:edge, + setting(search:steps, DefSteps), + option(steps(Steps0), Options, DefSteps), + ( Steps0 == 0 + -> Steps = -1 + ; Steps = Steps0 + ), + rdf_keyword_search(Keyword, TargetCond, State, + [expand_node(Expand)|Options]), + steps(0, Steps, State). + +steps(Steps, Steps, _) :- !. +steps(I, Steps, Graph) :- + I2 is I + 1, + ( rdf_extend_search(Graph) + -> ( debugging(rdf_search) + -> debug(rdf_search, 'After cycle ~D', [I2]), + forall(debug_property(P), + ( rdf_search_property(Graph, P), + debug(rdf_search, '\t~p', [P]))) + ; true + ), + steps(I2, Steps, Graph) + ; debug(rdf_search, 'Agenda is empty after ~D steps~n', [I]) + ). + +debug_property(target_count(_)). +debug_property(graph_size(_)). + + +%% edge(+Node, +Score, -Link) is nondet. +% +% Generate links from Node. + +edge(O, _, i(S,P,W)) :- + edge(O, S, P, W), + debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]), + W > 0.0001. + +edge(O, S, P, W) :- + setof(S, i_edge(O, S, P), Ss), + ( predicate_weight(P, W) + -> member(S, Ss) + ; length(Ss, Len), + member(S, Ss), + subject_weight(S, Len, W) + ). + +i_edge(O, S, P) :- + rdf(S, P, O). +i_edge(O, S, P) :- + rdf(O, P0, S), + atom(S), + ( owl_ultra_lite:inverse_predicate(P0, P) + -> true + ; predicate_weight(P0, 1) + -> P = P0 + ). + +%% predicate_weight(+Predicate, -Weight) is semidet. +% +% Weight based on the meaning of Predicate. This predicate deals +% with RDF predicates that have a well defined meaning. +% +% Additional weights (or overwrites) can be defined in +% cliopatria:predicate_weight/2, +% +% Note that rdfs:comment is not searched as it is supposed to +% be comment about the graph, and not part of the graph itself. + +/* +predicate_weight(P, Weight) :- + catch(cliopatria:predicate_weight(P, Weight), _, fail), !. +*/ +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, rdfs:label), !. +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, rdf:value), !. +predicate_weight(P, 1) :- + rdf_equal(P, owl:sameAs), !. +predicate_weight(P, 1) :- + rdf_equal(P, skos:exactMatch), !. +predicate_weight(P, 0) :- + rdfs_subproperty_of(P, rdfs:comment), !. +/* +predicate_weight(_, Weight) :- + catch(cliopatria:default_weight(Weight), _, fail). +*/ +subject_weight(S, _, 1) :- + rdf_is_bnode(S), !. +subject_weight(_, Count, W) :- + W is 1/max(3, Count). diff --git a/lib/cluster_search/rdf_cluster.pl b/lib/cluster_search/rdf_cluster.pl new file mode 100644 index 0000000..1c672c0 --- /dev/null +++ b/lib/cluster_search/rdf_cluster.pl @@ -0,0 +1,448 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_cluster, + [ search_path/3, % +Target, +Graph, -Path + search_path/4, % +Method, +Target, +Graph, -Path + cached_search_path/6, % +Method, +Target, +Graph, + % +CacheIn, -CacheOut, -Path + empty_path_cache/1, % -Cache + graph_path/5, % +Target, +Graph, +CacheIn -CacheOut, -Path + schema_path/2, % +Path, -SchemaPath + predicate_schema_path/2, % +Path, -SchemaPath + partial_schema_path/2, % +Path, -PartialSchemaPath + canonical_path/2, % +Path, -CanonicalPath + strip_alignment/2, % +Path, -StrippedPath + strip_rdf_value/2, + direct_path/2 + ]). + + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(assoc)). +:- use_module(library(rbtrees)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(rdf_graph). +:- use_module(rdf_search). +:- use_module(graph_search). +:- use_module(library(semweb/rdf_abstract)). + + +%% canonical_path(+PathIn, -PathOut) +% +% Reduce schema path to a canonical form. + +canonical_path(PathIn, PathOut) :- + strip_rdf_value(PathIn, Path), + merge_transitive(Path, Path1), + merge_related(Path1, PathOut). + +%% directh_path(+PathIn, -PathOut) +% +% Reduce path to a direct path. + +direct_path([S,P,O], [S,P,O]) :- !. +direct_path([S,P,O,Prop,L], [S,P,O,Prop,L]) :- + rdfs_subproperty_of(Prop,rdfs:label), !. +direct_path(_, other). + + +%% strip_alignment(+IPathIn, -IPathOut) is det. +% +% Strip alignment relations (owl:sameAs and skos:exactMatch) +% from a path. + +strip_alignment([], []). +strip_alignment([One], [One]) :- !. +/* +strip_alignment([_O1, P, O2|T0], T) :- + alignment_predicate(P), !, + instance_search:representative(O2, Represent), + strip_alignment([Represent|T0], T). +*/ +strip_alignment([O,P|T0], [O,P|T]) :- + strip_alignment(T0, T). + +term_expansion(alignment_predicate(P0), + alignment_predicate(P)) :- + rdf_global_id(P0, P). + +alignment_predicate(owl:sameAs). +alignment_predicate(skos:exactMatch). + + +%% merge_transitive(+PathIn, -PathOut) is det. +% +% Merge transitive relations from a path. + +merge_transitive([One], [One]). +merge_transitive([O1, P, _, P, O3|T0], T) :- + transitive_predicate(P), !, + merge_transitive([O1,P,O3|T0], T). +merge_transitive([O,P|T0], [O,P|T]) :- + merge_transitive(T0, T). + +/* +transitive_predicate(P) :- + rdfs_subproperty_of(P, iface:broader), !. +transitive_predicate(P) :- + rdfs_subproperty_of(P, iface:narrower), !. +*/ +transitive_predicate(P) :- + rdfs_individual_of(P, owl:'TransitiveProperty'), !. + + +%% merge_repetition(+PathIn, -PathOut) is det. +% +% Merge repetitive relations in a path. + +merge_repetition([], []). +merge_repetition([One], [One]). +merge_repetition([O1, P, _, P, O3|T0], [O1,seq(P)|T]) :- !, + more_repetition(T0, P, O3, T1, O), + merge_repetition([O,T1], T). +merge_repetition([O,P|T0], [O,P|T]) :- + merge_repetition(T0, T). + +more_repetition([P,O1|T], P, _, Rest, O) :- !, + more_repetition(T, P, O1, Rest, O). +more_repetition(T, _, O, T, O). + +%% merge_related(+PathIn, -PathOut) is det. +% +% Merge related relations from a path. + +merge_related([One], [One]). +/* +merge_related([O1, P1, _, P2, O3|T0], T) :- + iface_related(P1), + iface_related(P2), !, + rdf_equal(P, iface:related), + merge_related([O1,P,O3|T0], T). +*/ +merge_related([O,P|T0], [O,P|T]) :- + merge_related(T0, T). + +/* +iface_related(P) :- + rdfs_subproperty_of(P, iface:broader). +iface_related(P) :- + rdfs_subproperty_of(P, iface:narrower). +iface_related(P) :- + rdfs_subproperty_of(P, iface:related). +*/ + + +%% remove_label(+PathIn, -PathOut) +% +% Remove matching label part at the end of the Path. + +remove_label([S,P,O], [S,P,O]) :- !. +remove_label(PathIn, PathOut) :- + reverse(PathIn, [_,P|Rest]), + rdfs_subproperty_of(P, rdfs:label), + reverse(Rest, PathOut). + +%% strip_rdf_value(+PathIn, -PathOut) +% +% Remove rdf:value construction from the Path. + +strip_rdf_value([One], [One]) :- !. +strip_rdf_value([S,P,_,R,V|T], [S,P|Rest]) :- + rdf_equal(R, rdf:value), !, + strip_rdf_value([V|T], Rest). +strip_rdf_value([O,P|T0], [O,P|T]) :- + strip_rdf_value(T0, T). + + + +%% search_path(+TargetNode, +Graph, -Path) is det. +%% search_path(+Method, +TargetNode, +Graph, -Path) is det. +% +% Path connect TargetNode to a Node with type =start=. Note that +% if the search started with a target, the node-type is =target= +% and we return an empty Path. +% +% @param Path is list with alternating property and value, +% ended in the start-node (typically a literal). +% Path starts with TargetNode. +% @param Method one of =best= or =shortest= + +search_path(Target, Graph, Path) :- + search_path(best, Target, Graph, Path). + +search_path(best, Target, Graph, Path) :- + best_first_search(Target, Graph, -, _, Path), + !. +search_path(breadth, Target, Graph, Path) :- + breadth_first_search(Target, Graph, -, _, Path), + !. +search_path(_, Target, _, [Target]). % search started with target + +%% cached_search_path(+Method, +TargetNode, +Graph, +%% +CacheIn, -CacheOut, -Path) is det. +% +% Version of search_path/4 that maintains a cache of search +% results, to avoid searching the whole graph for each result. The +% cache is updated on each result found. Typically it is used in a +% loop to obtain the paths for a list of targets. +% +% @see empty_path_cache/1 to create the initial cache. + +cached_search_path(best, Target, Graph, CacheIn, CacheOut, Path) :- + best_first_search(Target, Graph, CacheIn, CacheOut, Path). +cached_search_path(breadth, Target, Graph, CacheIn, CacheOut, Path) :- + breadth_first_search(Target, Graph, CacheIn, CacheOut, Path). +%cached_search_path(_, Target, _, Cache, Cache, [Target]). + +%% empty_path_cache(-Cache) +% +% Produce initial cache for cached_search_path/6. + +empty_path_cache(Cache) :- + empty_assoc(Cache). + + +%% best_first_search(+Target, +Graph, +%% +CacheIn, -CacheOut, -Path) is nondet. +% +% Execute a best-first search through the graph back from the +% Target to a start node. +% +% @param CacheIn is an assoc used to cache previous results. Pass +% the atom '-' to disable caching. + +best_first_search(Target, Graph, CacheIn, CacheOut, Path) :- + empty_assoc(Assoc0), + put_assoc(Target, Assoc0, true, Done), + best_first_search([Target], Graph, CacheIn, CacheOut, Done, Path). + +best_first_search([AH|_Agenda], _Graph, CacheIn, CacheOut, _Done, Path) :- + CacheIn \== (-), + agenda_head(AH, R, Path0), + get_assoc(R, CacheIn, RestPath), !, + debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), + reverse(Path0, Path1), + append(Path1, RestPath, Path), + update_cache(Path, CacheIn, CacheOut). +best_first_search([AH|_Agenda], Graph, CacheIn, CacheOut, _Done, Path) :- + agenda_head(AH, R, Path0), + search_graph_node_type(Graph, R, start), !, + reverse(Path0, Path), + update_cache(Path, CacheIn, CacheOut). +best_first_search([AH|Agenda], Graph, CacheIn, CacheOut, Done, Path) :- + agenda_head(AH, R, Path0), + put_assoc(R, Done, true, Done2), + findall(n(Weight, [V,P|Path0]), + ( search_graph_rdf(Graph, R, P, V), + \+ get_assoc(V, Done2, _), + step_weight(P, V, Graph, Weight) + ), + Steps0 + ), + append(Steps0, Agenda, Agenda1), + sort(Agenda1, Agenda2), + reverse(Agenda2, NewAgenda), + best_first_search(NewAgenda, Graph, CacheIn, CacheOut, Done2, Path). + + +update_cache(_, -, -) :- !. % not cached +update_cache(Path, CacheIn, CacheOut) :- + debug(path, 'Adding path ~p', [Path]), + path_into_cache(Path, CacheIn, CacheOut). + +path_into_cache([], Cache, Cache). +path_into_cache(Path, CacheIn, CacheOut) :- + Path = [R|RPath], + ( get_assoc(R, CacheIn, _) + -> CacheOut = CacheIn + ; put_assoc(R, CacheIn, RPath, CacheTmp), + ( RPath = [_P|Rest] + -> path_into_cache(Rest, CacheTmp, CacheOut) + ; CacheOut = CacheTmp + ) + ). + + +%% breadth_first_search(+Target, +Graph, +%% +CacheIn, -CacheOut, -Path) is nondet. +% +% Execute a breath first search through the search graph, finding +% the shortest path from a target back to a start node. + +breadth_first_search(Target, Graph, CacheIn, CacheOut, Path) :- + empty_assoc(Assoc0), + put_assoc(Target, Assoc0, true, Done), + breadth_first_search([Target|Tail], Tail, Graph, + CacheIn, CacheOut, Done, Path). + +breadth_first_search(Agenda, ATail, _, _, _, _, _) :- + Agenda == ATail, !, fail. +breadth_first_search([AH|_Agenda], _, _, CacheIn, CacheOut, _, Path) :- + agenda_head(AH, R, Path0), + get_assoc(R, CacheIn, RestPath), !, + debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), + reverse(Path0, Path1), + append(Path1, RestPath, Path), + update_cache(Path, CacheIn, CacheOut). +breadth_first_search([AH|_Agenda], _, Graph, CacheIn, CacheOut, _, Path) :- + agenda_head(AH, R, Path0), + search_graph_node_type(Graph, R, start), + reverse(Path0, Path), + update_cache(Path, CacheIn, CacheOut). +breadth_first_search([AH|Agenda], ATail, Graph, + CacheIn, CacheOut, Done, Path) :- + agenda_head(AH, R, Path0), + put_assoc(R, Done, true, Done2), + findall(n(W, [V,P2|Path0]), + ( search_graph_rdf(Graph, R, P2, V), % FIXME + \+ get_assoc(V, Done2, true), + step_weight(P2, V, Graph, W) + ), + Steps0), + sort(Steps0, Steps1), + reverse(Steps1, Steps), % heighest weight first + append(Steps, NewTail, ATail), + breadth_first_search(Agenda, NewTail, Graph, + CacheIn, CacheOut, Done2, Path). + +agenda_head(n(_,Path), R, Path) :- !, + Path = [R,_P|_]. +agenda_head(R, R, [R]). + + +step_weight(P, _V, _Graph, Weight) :- + graph_search:predicate_weight(P, Weight), !. +step_weight(_, _V, _Graph, 0.3). + + +%% enumarate_path(+Target, +Graph, +%% +CacheIn, -CacheOut, -Path) is nondet. +% +% Execute a best-first search through the graph back from the +% Target to a start node. +% +% @param CacheIn is an assoc used to cache previous results. Pass +% the atom '-' to disable caching. + +graph_path(Target, Graph, CacheIn, CacheOut, Path) :- + empty_assoc(Assoc0), + put_assoc(Target, Assoc0, true, Done), + graph_path([Target], Graph, CacheIn, CacheOut, Done, Path). + +graph_path([AH|_Agenda], _Graph, CacheIn, CacheOut, _Done, Path) :- + CacheIn \== (-), + path_head(AH, R, Path0), + get_assoc(R, CacheIn, RestPath), + debug(path, 'Cache hit ~p --> ~p', [R, RestPath]), + reverse(Path0, Path1), + append(Path1, RestPath, Path), + update_cache(Path, CacheIn, CacheOut). +graph_path([AH|_Agenda], Graph, CacheIn, CacheOut, _Done, Path) :- + path_head(AH, R, Path0), + search_graph_node_type(Graph, R, start), + reverse(Path0, Path), + update_cache(Path, CacheIn, CacheOut). +graph_path([AH|Agenda], Graph, CacheIn, CacheOut, Done, Path) :- + path_head(AH, R, Path0), + put_assoc(R, Done, true, Done2), + findall([V,P|Path0], + ( search_graph_rdf(Graph, R, P, V), + \+ get_assoc(V, Done2, _) + ), + Steps0 + ), + Steps0 \== [], + append(Steps0, Agenda, Agenda1), + sort(Agenda1, Agenda2), + reverse(Agenda2, NewAgenda), + graph_path(NewAgenda, Graph, CacheIn, CacheOut, Done2, Path). + +path_head([R|Path], R, [R|Path]) :- !. +path_head(R, R, [R]). + +%% schema_path(+InstancePath, -SchemaPath) is det. +% +% Abstract a path by lifting all resources to their class/concept +% and all predicates to their interface properties. +% +% @param InstancePath is a list of alternating nodes and edges +% (predicates) that starts with the origin of the search +% and end with the target (e.g. artwork). +% @param SchemaPath is a list of the same length as InstancePath +% holding the abstracted path. +% @see Keep in sync with kwd_search:predicate_path/2 from old +% basic search. + +schema_path([], []). +schema_path([V], [Class]) :- !, + ( V = literal(_) + -> rdf_equal(Class, rdfs:'Literal') + ; Class = V % keep the URI + ). +/* schema_path([V, P|T], [Class, AP|Rest]) :- + iface_abstract_class(V, Class), + iface_abstract_predicate(P, AP), + schema_path(T, Rest). +*/ + +predicate_schema_path([], []). +predicate_schema_path([V], [V]). +/* predicate_schema_path([V, P|T], [V, AP|Rest]) :- + iface_abstract_predicate(P, AP), + predicate_schema_path(T, Rest). +*/ + + +direct_schema_path([], []). +direct_schema_path([V], [L]) :- !, + ( V = literal(_) + -> L='query' + ; rdf_label(V,L) + ). +/* direct_schema_path([V, P|T], [Class, AP|Rest]) :- + iface_abstract_class(V, Class), + iface_abstract_predicate(P, AP), + direct_schema_path(T, Rest). +*/ + + +%% partial_schema_path(+InstancePath, -SchemaPath) is det. +% +% Abstract a path by lifting those resources for which an +% abstraction is defined. + +partial_schema_path([], []). +partial_schema_path([R|T], [Abstract|Rest]) :- + ( R = literal(_) + -> rdf_equal(Abstract, rdfs:'Literal') + ; catch(cliopatria:abstract_class(R, Abstract),_,fail) + -> true + ; Abstract = R + ), + partial_schema_path(T, Rest). + +cliopatria:abstract_class(A,A). diff --git a/lib/cluster_search/rdf_direct_search.pl b/lib/cluster_search/rdf_direct_search.pl new file mode 100644 index 0000000..57dea76 --- /dev/null +++ b/lib/cluster_search/rdf_direct_search.pl @@ -0,0 +1,178 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_direct_search, + [ rdf_concept_search/4, % +KeyWord, +TargetCond, -State, +Options + rdf_literal_search/4 % +KeyWord, +TargetCond, -State, +Options + ]). + +:- use_module(rdf_search). +:- use_module(rdf_graph). +:- use_module(library(record)). +:- use_module(library(assoc)). +:- use_module(library(url)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(filter). + +/** <module> Direct metadata search on RDF graph + +@author Michiel Hildebrand, on top of search facilities from rdf_search.pl +*/ + +:- meta_predicate + rdf_direct_search(+, 1, -, +). + + +:- record + expand(description = false, + initial_eq = false, + equivalent = false, + narrower = false, + narrower1 = false, + broader = false, + related = false, + specific = false, + rdf_value = true, + forward = false + ). + +%% rdf_direct_search(+Keyword, :TargetCond, -State, +Options) +% +% Initiate a graph search by traversing only direct links between +% keyword and target resources. +% +% Options: see rdf_search/4 + +rdf_concept_search(Keyword, TargetCond, State, Options) :- + make_expand([], Expand0), + set_expand_options(Expand0, Expand, Options), + Edge = rdf_direct_search:edge(Expand, TargetCond), + rdf_keyword_search(Keyword, TargetCond, State, + [expand_node(Edge),target_expand(false)|Options]), + steps(State). + +rdf_literal_search(Keyword, TargetCond, State, Options) :- + Expand = rdf_direct_search:literal_edge, + rdf_keyword_search(Keyword, TargetCond, State, + [expand_node(Expand),target_expand(false)|Options]), + steps(State). + +set_expand_options(Expand0, Expand, Options) :- + option(expand(Es0), Options, []), + maplist(atom_to_option, Es0, Es), + set_expand_fields(Es, Expand0, Expand, _). + +atom_to_option(H, Opt) :- + atom(H), + Opt =.. [H,true]. + +%% steps(+State) +% +% Recursively call rdf_extend_search/1 until it fails. + +steps(State) :- + rdf_extend_search(State), !, + ( debugging(rdf_search) + -> forall(debug_property(P), + ( rdf_search_property(State, P), + debug(rdf_search, '\t~p', [P]))) + ; true + ), + steps(State). +steps(_). + +debug_property(target_count(_)). +debug_property(graph_size(_)). + +%% edge(+Expand, +Cond, +Graph, +Object, -Link) is nondet. +% +% Default predicate to generate edges. + +edge(Expand, Cond, O, Score, i(S,P,W)) :- + setof(S, edge_i(O, S, P), Ss), + ( O = literal(_) + -> ( rdfs_subproperty_of(P, rdfs:label) + -> W = 1 + ; expand_description(Expand, true) + -> W = 0.5 + ), + member(S, Ss) + ; cond_predicate_weight(P, Expand, Score, W) + -> member(S, Ss) + ; W = 0.5, + member(S, Ss), + call(Cond, S) + ). +edge(Expand, _Cond, O, Score, i(S,P,W)) :- + expand_forward(Expand, true), + setof(S, edge_i(S, O, P), Ss), + ( cond_f_predicate_weight(P, Expand, Score, W) + -> member(S, Ss) + ). + +edge_i(O, S, P) :- + rdf(S, P, O). +edge_i(O, S, P) :- + rdf(O, P0, S), + rdf_search:inverse_property(P, P0). + +%% cond_predicate_weight(+Pred, +ExpandState, +Score, -Weight) +% +% Weight is the weight of Pred if ExpandState and Score allow. + +cond_predicate_weight(P, Expand, _Score, 1) :- + rdf_equal(P, rdf:value), !, + expand_rdf_value(Expand, true). +cond_predicate_weight(P, Expand, Score, 1) :- + eq_predicate(P), !, + ( expand_equivalent(Expand, true) + -> true + ; Score > 0.9, + expand_initial_eq(Expand, true) + ). +cond_predicate_weight(_P, Expand, Score, 0.5) :- + Score > 0.5, !, + expand_specific(Expand, true). + +%% cond_f_predicate_weight(+Pred, +ExpandState, +Score, -Weight) +% +% Weight is the weight of Pred if ExpandState and Score allow. + +cond_f_predicate_weight(P, Expand, _Score, 1) :- + rdf_equal(P, rdf:value), !, + expand_rdf_value(Expand, true). + +eq_predicate(P) :- + rdfs_subproperty_of(P, owl:sameAs). +eq_predicate(P) :- + rdfs_subproperty_of(P, skos:exactMatch). + + + +literal_edge(literal(L), _Score, i(S,P,1)) :- + rdf(S, P, literal(L)). diff --git a/lib/cluster_search/rdf_full_search.pl b/lib/cluster_search/rdf_full_search.pl new file mode 100644 index 0000000..ac3f161 --- /dev/null +++ b/lib/cluster_search/rdf_full_search.pl @@ -0,0 +1,151 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_full_search, + [ rdf_full_search/4 % +KeyWord, +TargetCond, -State, +Options + ]). + +:- use_module(rdf_search). +:- use_module(rdf_graph). +:- use_module(kwd_search). +:- use_module(library(record)). +:- use_module(library(assoc)). +:- use_module(library(url)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(filter). + +/** <module> Full search on RDF graph + +@author Michiel Hildebrand, based on util(rdf_search.pl) +*/ + +:- meta_predicate + rdf_full_search(+, 1, -, +). + + +%% rdf_full_search(+Keyword, :TargetCond, -State, +Options) +% +% Initiate a full graph search by traversing resources in all +% possible ways. +% +% Options: see rdf_search/4 + +rdf_full_search(Keyword, TargetCond, State, Options) :- + Expand = rdf_full_search:edge, + setting(search:steps, DefSteps), + option(steps(Steps0), Options, DefSteps), + ( Steps0 == 0 + -> Steps = -1 + ; Steps = Steps0 + ), + rdf_keyword_search(Keyword, TargetCond, State, + [expand_node(Expand)|Options]), + steps(0, Steps, State). + +steps(Steps, Steps, _) :- !. +steps(I, Steps, Graph) :- + I2 is I + 1, + ( rdf_extend_search(Graph) + -> ( debugging(rdf_search) + -> debug(rdf_search, 'After cycle ~D', [I2]), + forall(debug_property(P), + ( rdf_search_property(Graph, P), + debug(rdf_search, '\t~p', [P]))) + ; true + ), + steps(I2, Steps, Graph) + ; debug(rdf_search, 'Agenda is empty after ~D steps~n', [I]) + ). + +debug_property(target_count(_)). +debug_property(graph_size(_)). + + +%% edge(+Node, -Link) is nondet. +% +% Generate links from Node. + +edge(O, i(S,P,W)) :- + i_edge(O, S, P, W), + debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]), + W > 0.0001. +edge(O, f(S,P,W)) :- + f_edge(O, S, P, W), + debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]), + W > 0.0001. + + +i_edge(O, S, P, W) :- + setof(S, rdf(S, P, O), Ss), + ( predicate_weight(P, W) + -> member(S, Ss) + ; length(Ss, Len), + member(S, Ss), + subject_weight(S, Len, W) + ). + +f_edge(O, S, P, W) :- + setof(S, f_rdf(O, P, S), Ss), + ( predicate_weight(P, W) + -> member(S, Ss) + ; length(Ss, Len), + member(S, Ss), + subject_weight(S, Len, W) + ). + +f_rdf(O, P, S) :- + rdf(O,P,S), + atom(S), + \+ rdf_equal(P,skos:inScheme). + +%% predicate_weight(+Predicate, -Weight) is semidet. +% +% Weight based on the meaning of Predicate. This predicate deals +% with RDF predicates that have a well defined meaning. +% +% Additional weights (or overwrites) can be defined in +% cliopatria:predicate_weight/2, +% +% Note that rdfs:comment is not searched as it is supposed to +% be comment about the graph, and not part of the graph itself. + +%predicate_weight(P, 1) :- + %rdfs_subproperty_of(P, rdfs:label), !. +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, rdf:value), !. +predicate_weight(P, 1) :- + rdf_equal(P, owl:sameAs), !. +predicate_weight(P, 1) :- + rdf_equal(P, skos:exactMatch), !. +predicate_weight(P, 0) :- + rdfs_subproperty_of(P, rdfs:comment), !. + +subject_weight(S, _, 1) :- + rdf_is_bnode(S), !. +subject_weight(_, Count, W) :- + W is 1/max(3, Count). diff --git a/lib/cluster_search/rdf_graph.pl b/lib/cluster_search/rdf_graph.pl new file mode 100644 index 0000000..3b8c756 --- /dev/null +++ b/lib/cluster_search/rdf_graph.pl @@ -0,0 +1,549 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_graph, + [ new_search_graph/1, % -Graph + search_graph_add_node/3, % !Graph, +Node, +Fields + search_graph_add_edge/4, % !Graph, +From, +Pred, +To + search_graph_add_edge/5, % !Graph, +From, +Pred, +To, +Weight + search_graph_set_node_type/3, % !Graph, +Node, +Type + + search_graph_prune/2, % !Graph, +Nodes + search_graph_prune/3, % !Graph, +Node, -New + search_graph_drains/2, % +Graph, -Drains + search_graph_sources/2, % +Graph, -Sources + + % Agenda + search_graph_next_agenda/3, % !Graph, -Node, -Score + search_graph_agenda/2, % +Graph, -Pairs + + % Queries + search_graph_size/2, % +Graph, -NodeCount + search_graph_node_score/3, % +Graph, +Node, -Score + search_graph_node_type/3, % +Graph, +Node, -Type + search_graph_nodes_score_list/2, % @Graph, -list(Node-Score) + + % RDF Query graph + search_graph_rdf_graph/2, % +Graph, -Triples + search_graph_rdf/4, % +Graph, ?S, ?P, ?O + search_graph_rdfs/4 % +Graph, ?S, ?P, ?O + ]). +:- use_module(library(record)). +:- use_module(library(rbtrees)). +:- use_module(library(error)). +:- use_module(library(debug)). +:- use_module(library(lists)). +:- use_module(library(option)). +:- use_module(library('semweb/rdf_db')). + +/** <module> Representing RDF search paths + +---++ Introduction + +This module represents a search-graph through an RDF graph starting at +one or more locations in the graph. This search-graph is essentially a +subgraph of the original graph. As it is intended as a dynamically +evolving structure during reasoning, it is represented as a pure Prolog +term and therefore subject to backtracking and normal Prolog memory +management. + +---++ Representation + +The graph is represented as an RB tree from URI to a record + + node_data(Next, Previous, Score, Type) + +where Next and Previous are lists of p([fi](Predicate), Target). If +f(P), the link is rdf(Prev, P, Next), if i(P), the link is rdf(Next, P, +Prev). The score of the node indicates semantic distance, ranging from 0 +(very far way) to 1 (very close). Finally, Type classifies the node as +one of =start=, =node= or =target=. + +The graphs as a whole contains + + $ nodes : + An rb_tree mapping node-id to a node_data term as described above. + $ agenda : + An rb_tree mapping score to a list of nodes with that score. + $ size : + Nunber of nodes (for statistical reasons). + +---++ Issues + +---+++ Shall we deal internally with owl:sameAs? + +---+++ How to deal with owl property types? + + $ owl:inverseOf : + $ owl:TransitiveProperty : + $ owl:SymmetricProperty : + +@tbd No weight loss passing a blank node? +*/ + +:- record graph(nodes, agenda, size=0). +:- record node_data(next=[], previous=[], score=1, type=node). + + +%% new_search_graph(-Graph) +% +% Create a new search graph. The initial graph is empty. + +new_search_graph(Graph) :- + rb_empty(Nodes), + rb_empty(Agenda), + make_graph([nodes(Nodes), agenda(Agenda)], Graph). + + +%% search_graph_add_node(!Graph, +Node, +Fields) +% +% Add a (start-)node to the graph + +search_graph_add_node(Graph, Node, Fields) :- + graph_nodes(Graph, Assoc0), + make_node_data(Fields, NodeData), + rb_insert(Assoc0, Node, NodeData, Assoc1), + set_nodes_of_graph(Assoc1, Graph), + graph_size(Graph, Size0), + Size is Size0 + 1, + set_size_of_graph(Size, Graph), + node_data_score(NodeData, Score0), + add_new_node_to_agenda(Graph, Node, Score0, Score), + ( Score == Score0 + -> true + ; set_score_of_node_data(Score, NodeData) + ). + + +%% search_graph_add_edge(!Graph, +From, +Pred, +To) is det. +%% search_graph_add_edge(!Graph, +From, +Pred, +To, +Options) is det. +% +% Add a link {From->Pred->To} to the graph. Options include +% +% * weight(+Weight) +% Weight to use for the predicate link instead of the default +% +% * inverse(true) +% <From, Pred, To> is formed from rdf(To,Pred,From) + +search_graph_add_edge(Graph, From, Pred, To) :- + search_graph_add_edge(Graph, From, Pred, To, []). +search_graph_add_edge(Graph, From, Pred, To, Options) :- + graph_nodes(Graph, Assoc0), + rb_lookup(From, FromData, Assoc0), + ( option(inverse(true), Options) + -> PTerm = i(Pred) + ; PTerm = f(Pred) + ), + add_next_node_data(FromData, p(PTerm, To)), + node_data_score(FromData, Score0), + ( option(weight(Weight), Options) + -> Score is Score0 * Weight + ; update_score(Graph, From, Pred, To, Score0, Score) + ), + ( rb_lookup(To, ToData, Assoc0) + -> add_previous_node_data(ToData, p(PTerm, From)), + node_data_score(ToData, Score1), + join_score(Score1, Score, NewScore), + reschedule_node(Graph, To, Score1, NewScore, FinalScore), + set_score_of_node_data(FinalScore, ToData) + ; search_graph_add_node(Graph, To, + [ score(Score), + previous([p(PTerm, From)]) + ]) + ). + +add_next_node_data(Data, Next) :- + node_data_next(Data, Next0), + set_next_of_node_data([Next|Next0], Data). + +add_previous_node_data(Data, Previous) :- + node_data_previous(Data, Previous0), + set_previous_of_node_data([Previous|Previous0], Data). + +%% join_score(+Score1, +Score2, -Score) +% +% Join two scores comming from independent paths. Scores are +% considered probabilities. + +join_score(S1, S2, S) :- + S is 1-((1-S1)*(1-S2)). + +%% update_score(+Graph, +From, +Pred, +To, +Score0, -Score) +% +% Given that From has score ScoreO, compute the score for To based +% on Pred. First prototype used a predefined value per predicate +% in the range 0..1. Alternatively we can use statistical +% measures. Some porposals: +% +% * How many subjects of the same type have this property with +% this value? +% * How many subjects have this object (over this relation)? +% +% @param Score Float in the range 0.0..1.0 + +update_score(_Graph, From, Pred, To, Score0, Score) :- + by_in_links(From, Pred, To, Factor), + Score is Score0 * Factor. + +by_in_links(_From, Pred, To, Factor) :- + rdf_estimate_complexity(_, Pred, To, Complexity), + Factor is 1/Complexity. + +%% search_graph_set_node_type(+Graph, +Node, +Type) is det. +% +% Set the type for Node in Graph to Type. +% +% @error existence_error(node, Node) + +search_graph_set_node_type(Graph, Node, Type) :- + graph_nodes(Graph, Assoc), + ( rb_lookup(Node, Data, Assoc) + -> set_type_of_node_data(Type, Data) + ; existence_error(node, Node) + ). + +%% search_graph_node_type(+Graph, +Node, -Type) is det. +% +% Type is the classification for Node in Graph. +% +% @tbd Generalise modes? + +search_graph_node_type(Graph, Node, Type) :- + graph_nodes(Graph, Assoc), + ( rb_lookup(Node, Data, Assoc) + -> node_data_type(Data, Type) + ; existence_error(node, Node) + ). + +%% search_graph_node_score(+Graph, +Node, -Weight) is det. +% +% Weight is the current score for Node in Graph. + +search_graph_node_score(Graph, Node, Weight) :- + graph_nodes(Graph, Assoc), + rb_lookup(Node, Data, Assoc), + node_data_score(Data, Weight). + +%% search_graph_size(+Graph, -Size) is det. +% +% Size of the graph in nodes. + +search_graph_size(Graph, Size) :- + graph_size(Graph, Size). + + + /******************************* + * PRUNING * + *******************************/ + +%% search_graph_prune(!Graph, +Nodes) is det. +%% search_graph_prune(!Graph, +Nodes, -NewLeaves) is det. +% +% Delete non-target leave nodes from Graph. + +search_graph_prune(Graph, Prune) :- + search_graph_prune(Graph, Prune, _). + +search_graph_prune(Graph, Prune, NewLeaves) :- + is_list(Prune), !, + find_ancestors(Prune, Graph, Ancestors), + group_pairs_by_key(Ancestors, Grouped), + delete_next_links(Grouped, Graph, NewLeaves). +search_graph_prune(Graph, Prune, NewLeaves) :- + search_graph_prune(Graph, [Prune], NewLeaves). + +%% find_ancestors(+Prune, +Graph, -Ancestors) +% +% @param Ancestors is a difference-list of terms From-p(Pred,To) + +find_ancestors(Prune, Graph, Ancestors) :- + graph_nodes(Graph, Nodes), + find_ancestors(Prune, Graph, Nodes, RestNodes, + 0, Deleted, Ancestors, []), + graph_size(Graph, Size0), + Size is Size0 - Deleted, + set_size_of_graph(Size, Graph), + set_nodes_of_graph(RestNodes, Graph). + +find_ancestors([], _, Nodes, Nodes, C, C, A, A). +find_ancestors([H|T], Graph, Nodes0, Nodes, C0, C, A, AT) :- + rb_lookup(H, Data, Nodes0), + ( node_data_type(Data, target) + -> find_ancestors(T, Graph, Nodes0, Nodes, C0, C, A, AT) + ; node_data_next(Data, Next), + Next \== [] + -> permission_error(Graph, prune, H) + ; node_data_previous(Data, Prev), + make_a_list(Prev, H, A, AT0), + rb_delete(Nodes0, H, Nodes1), + C1 is C0 + 1, + find_ancestors(T, Graph, Nodes1, Nodes, C1, C, AT0, AT) + ). + +make_a_list([], _, A, A). +make_a_list([p(Pred, From)|T0], To, [From-p(Pred,To)|T1], T) :- + make_a_list(T0, To, T1, T). + + +delete_next_links([], _, []). +delete_next_links([H-DelNext|T], Graph, NewLeaves) :- + graph_nodes(Graph, Nodes), + rb_lookup(H, Data, Nodes), + node_data_next(Data, Next0), + subtract_set(Next0, DelNext, Next), + set_next_of_node_data(Next, Data), + ( Next == [] + -> NewLeaves = [H|NewLeaves1], + delete_next_links(T, Graph, NewLeaves1) + ; delete_next_links(T, Graph, NewLeaves) + ). + +%% subtract_set(+Set, +Subtract, -Remaining) is det. +% +% Remaining are the elements from Set that are not in Subtract. +% None of the sets is ordered. We have to apply heuristics to find +% out the best approach. + +subtract_set(Set, [One], Remaining) :- !, + selectchk(One, Set, Remaining). +subtract_set(Set, Subtract, Remaining) :- + sort(Set, SSet), + sort(Subtract, SSubstract), + ord_subtract(SSet, SSubstract, Remaining). + + +%% search_graph_nodes_score_list(@Graph, -Scores:list(Node-Score)) is det. +% +% Scores is a list with Node-Score pairs, with all nodes in +% Graph. + +search_graph_nodes_score_list(Graph, Scores) :- + graph_nodes(Graph, Assoc), + rb_visit(Assoc, NodeList), + scores(NodeList, Scores). + +scores([], []). +scores([Node-Data|T0], [Node-Score|T]) :- + node_data_score(Data, Score), !, + scores(T0, T). + + +%% search_graph_drains(@Graph, -Nodes) +% +% List of all drains (end-of-path) + +search_graph_drains(Graph, Nodes) :- + graph_nodes(Graph, Assoc), + rb_visit(Assoc, NodeList), + drains(NodeList, Nodes). + +drains([], []). +drains([Node-Data|T0], [Node|T]) :- + node_data_next(Data, []), !, + drains(T0, T). +drains([_|T0], T) :- + drains(T0, T). + + +%% search_graph_sources(@Graph, -Nodes) +% +% List of all drains (start-of-path) + +search_graph_sources(Graph, Nodes) :- + graph_nodes(Graph, Assoc), + rb_visit(Assoc, NodeList), + sources(NodeList, Nodes). + +sources([], []). +sources([Node-Data|T0], [Node|T]) :- + node_data_previous(Data, []), !, + sources(T0, T). +sources([_|T0], T) :- + sources(T0, T). + + + /******************************* + * AGENDA * + *******************************/ + +%% add_new_node_to_agenda(!Graph, +Node, +Score, -FinalScore) is det. +% +% Add a new node to the agenda. We want a unique score for each +% node, so we can find the node from the score. Therefore, if +% there is already a node with this score, we use a slightly lower +% score, etc. +% +% @tbd: use rb_update/5. + +add_new_node_to_agenda(Graph, Node, Score, FinalScore) :- + must_be(between(0.0,1.0), Score), + graph_agenda(Graph, RBTree0), + into_agenda(RBTree0, Node, Score, RBTree, FinalScore), + set_agenda_of_graph(RBTree, Graph). + +into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- + rb_insert_new(RBTree0, Score, Node, RBTree), !, + FinalScore = Score. +into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- + rb_previous(RBTree0, Score, PrevScore, _), !, + ( Score - PrevScore =< 0.001 + -> between(1, infinite, Step), + FinalScore is PrevScore - 0.000000001*Step + ; between(1, infinite, Step), + FinalScore is Score - 0.001*Step + ), + rb_insert_new(RBTree0, FinalScore, Node, RBTree), !. +into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :- + FinalScore is Score - 0.001, + rb_insert_new(RBTree0, FinalScore, Node, RBTree). + +%% reschedule_node(!Graph, +Node, +OldScore, -NewScore) +% +% Change the score of Node in the agenda of Graph. Leaves the +% agenda untouched if Node is not in it as this implies the node +% is already expanded. + +reschedule_node(Graph, Node, OldScore, NewScore, FinalScore) :- + graph_agenda(Graph, RBTree0), + ( rb_delete(OldScore, Node, RBTree0) + -> into_agenda(RBTree0, Node, NewScore, RBTree, FinalScore), + set_agenda_of_graph(RBTree, Graph) + ; FinalScore = NewScore % already expanded + ). + +%% search_graph_next_agenda(!Graph, -Node, -Score) is semidet. +% +% Fetches the best node from the agenda and deletes it + +search_graph_next_agenda(Graph, Node, Score) :- + graph_agenda(Graph, RBTree0), + rb_del_max(RBTree0, Score, Node, RBTree), + set_agenda_of_graph(RBTree, Graph). + + +%% search_graph_agenda(+Graph, -Pairs) +% +% Get agenda from the graph, showing not-expanded nodes as pairs +% Score-Node. E.g. +% +% == +% Agenda = [1-a, 0.5-b, 0.25-c] +% == + +search_graph_agenda(Graph, Pairs) :- + graph_agenda(Graph, RBTree), + rb_visit(RBTree, Pairs0), + reverse(Pairs0, Pairs). + + + /******************************* + * RDF * + *******************************/ + +%% search_graph_rdf_graph(+Graph, -RDF:list(rdf(S,P,O))) is det. +% +% Create a list of rdf(S,P,O) triples from Graph. + +search_graph_rdf_graph(Graph, RDF) :- + graph_nodes(Graph, Nodes), + rb_visit(Nodes, Pairs), + phrase(graph_to_rdf(Pairs), RDF). + +graph_to_rdf([]) --> + []. +graph_to_rdf([R-Data|T]) --> + { node_data_next(Data, Next) + }, + node_to_rdf(Next, R), + graph_to_rdf(T). + +node_to_rdf([], _) --> + []. +node_to_rdf([p(P, O)|T], S) --> + link_to_rdf(P, S, O), + node_to_rdf(T, S). + +link_to_rdf(f(P), S, O) --> + [ rdf(S, P, O) ]. +link_to_rdf(i(P), S, O) --> + [ rdf(O, P, S) ]. + +%% search_graph_rdf(+Graph, ?S, ?P, ?O) is nondet. +% +% rdf(S,P,O) is an RDF statement in Graph. Provides indexed access +% if either S or O are given. + +:- rdf_meta + search_graph_rdf(+, r, r, o), + search_graph_rdfs(+, r, r, o). + +search_graph_rdf(Graph, S, P, O) :- + ground(S), ground(O), !, + graph_nodes(Graph, Nodes), + rb_lookup(S, SData, Nodes), + rb_lookup(O, OData, Nodes), + ( node_data_next(SData, SN), + dmember(P, p(f(P),O), SN) + ; node_data_next(OData, PN), + dmember(P, p(i(P),S), PN) + ). +search_graph_rdf(Graph, S, P, O) :- + ( search_graph_rdf_(Graph, S, f(P), O) + ; search_graph_rdf_(Graph, O, i(P), S) + ). + +search_graph_rdf_(Graph, S, P, O) :- + graph_nodes(Graph, Nodes), + ( ground(S) + -> rb_lookup(S, SData, Nodes), + node_data_next(SData, List), + member(p(P, O), List) + ; ground(O) + -> rb_lookup(O, OData, Nodes), + node_data_previous(OData, List), + member(p(P, S), List) + ; rb_in(S, SData, Nodes), + node_data_next(SData, List), + member(p(P, O), List) + ). + +dmember(Ground, Element, List) :- + ground(Ground), !, + memberchk(Element, List). +dmember(_Ground, Element, List) :- + member(Element, List), !. + +%% search_graph_rdfs(+Graph, ?S, ?P, ?O) is nondet. +% +% rdf(S,P0,O) is an RDF statement in Graph and P0 is a +% subPropertyOf P. Provides indexed access if either S or O are +% given. +% +% @see This is the same as rdf_has/3 from library rdf_db.pl + +search_graph_rdfs(Graph, S, P, O) :- + search_graph_rdf(Graph, S, P0, O), + rdf_reachable(P0, rdfs:subPropertyOf, P). + + diff --git a/lib/cluster_search/rdf_hierarchy_search.pl b/lib/cluster_search/rdf_hierarchy_search.pl new file mode 100644 index 0000000..b55bd70 --- /dev/null +++ b/lib/cluster_search/rdf_hierarchy_search.pl @@ -0,0 +1,104 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_hierarchy_search, + [ rdf_hierarchy_search/4 % +KeyWord, +TargetCond, -State, +Options + ]). + +:- use_module(rdf_search). +:- use_module(rdf_graph). +:- use_module(kwd_search). +:- use_module(library(record)). +:- use_module(library(assoc)). +:- use_module(library(url)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library('semweb/rdf_db')). +:- use_module(library('semweb/rdfs')). +:- use_module(filter). +:- use_module(iface_util). + +/** <module> Direct metadata and hierarchy search on RDF graph + +@author Michiel Hildebrand, on top of search facilities from rdf_search.pl +*/ + +:- meta_predicate + rdf_hierarchy_search(+, 1, -, +). + +%% rdf_hierarchy_search(+Keyword, :TargetCond, -State, +Options) +% +% Initiate a graph search by traversing only direct links +% and narrower relations. +% +% Options: see rdf_search/4 + +rdf_hierarchy_search(Keyword, TargetCond, State, Options) :- + Expand = rdf_hierarchy_search:edge(TargetCond), + rdf_keyword_search(Keyword, TargetCond, State, + [expand_node(Expand),target_expand(false)|Options]), + steps(State). + +steps(State) :- + rdf_extend_search(State), !, + steps(State). +steps(_). + +%% edge(+Cond, +Object, -Link) is nondet. +% +% Default predicate to generate edges. + +edge(Cond, O, i(S,P,W)) :- + edge_i(O, S, P), + predicate_weight(P, W), + hierarchy_search_edge(S, P, O, W, Cond). + + +hierarchy_search_edge(_, _P, literal(_), _, _) :- + %rdfs_subproperty_of(P, rdfs:label), + !. +hierarchy_search_edge(_, _, _, 1, _) :- !. +hierarchy_search_edge(S, _, _, _, Cond) :- + call(Cond, S). + + +edge_i(O, S, P) :- + rdf(S, P, O). +edge_i(O, S, P) :- + rdf(O, P0, S), + rdf_search:inverse_property(P, P0). + + +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, rdfs:label), !. +predicate_weight(P, 1) :- + rdf_equal(P, owl:sameAs), !. +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, skos:exactMatch), !. +predicate_weight(P, 1) :- + rdf_equal(P, rdf:value), !. +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, iface:broader), !. +predicate_weight(_, 0.5). diff --git a/lib/cluster_search/rdf_search.pl b/lib/cluster_search/rdf_search.pl new file mode 100644 index 0000000..aa66583 --- /dev/null +++ b/lib/cluster_search/rdf_search.pl @@ -0,0 +1,445 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdf_search, + [ rdf_keyword_search/4, % +KeyWord, +TargetCond, -State, +Options + rdf_init_state/3, % +TargetCond, -State, +Options + rdf_start_search/2, % +Query, -State + rdf_extend_search/1, % !State + rdf_prune_search/1, % !State + rdf_prune_search/2, % !State, +Options + rdf_search_property/2 % +Graph, ?Property + ]). +:- use_module(rdf_graph). +:- use_module(kwd_search). +:- use_module(library(record)). +:- use_module(library(count)). +:- use_module(library(assoc)). +:- use_module(library(pairs)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(library(option)). +:- use_module(library('semweb/rdf_db')). +:- use_module(library('semweb/rdfs')). +:- use_module(filter). + +/** <module> Search RDF graph + +@tbd Proper location for find_literals/3 + +@author Jan Wielemaker +*/ + +:- meta_predicate + rdf_keyword_search(+, 1, -, +), + init_search_state(1, -, +). + +:- record + state(graph, % Current search graph + start, + targets, % Collected hits + target_count = 0, % # Targets found + target_condition, % Test resource as a target + threshold = 0.05, % Graph search threshold + literal_threshold = 0.05, % Literal matching threshold + literal_score = true, % Use literal score in graph search + edge_limit = 0, % limit edges per node + target_expand = true, % expand targets + expand_node). % Generate new edges + + +%% rdf_keyword_search(+Keyword, :TargetCond, -State, +Options) +% +% Initiate a search-graph from a keyword by adding all matching +% literals to the initial state of the graph. Options: +% +% * expand_node(:Goal) +% Called as Goal(+Object, -Link) to create new edges. +% +% * threshold(+Number) +% Nodes with a weight lower then Threshold are not further explore +% +% * literal_threshold(+Number) +% Literals with a minimum edit distance above the literal threshold +% are added to the search graph. +% +% * edge_limit(+Number) +% Per Node a maximum of edges are added to the graph (0 is unbound). +% +% * expand_target(+Boolean) +% When true target nodes are alse expanded + +rdf_keyword_search(Keyword, TargetCond, State, Options) :- + rdf_init_state(TargetCond, State, Options), + rdf_start_search(Keyword, State). + +%% rdf_init_state(:TargetCond, -State, +Options) +% +% Initiate a search-graph state. +% Options see rdf_keyword_search/4 + +rdf_init_state(TargetCond, State, Options) :- + new_search_graph(Graph), + empty_assoc(Start), + empty_assoc(Targets), + strip_module(TargetCond, M, TC), + make_state([ graph(Graph), + start(Start), + targets(Targets), + target_condition(M:TC) + ], State0), + meta_options(rdf_search:is_meta, Options, MetaOptions), + set_state_fields(MetaOptions, State0, State, _RestOptions). + + +rdf_start_search(R, State) :- + ( is_list(R) + -> Rs = R + ; is_resource(R) + -> Rs = [R] + ), !, + state_start(State, Start0), + state_graph(State, Graph), + add_resources(Rs, Start0, Graph, Start, Links), + set_start_of_state(Start, State), + add_hits(Links, State). +rdf_start_search(Search, State) :- + state_literal_threshold(State, Threshold), + kwd_search:find_literals(Search, Literals, [threshold(Threshold)]), + state_graph(State, Graph), + state_start(State, Start0), + ( state_literal_score(State, false) + -> add_literals_no_score(Literals, Start0, Graph, Start) + ; add_literals(Literals, Start0, Graph, Start) + ), + set_start_of_state(Start, State). + +is_meta(expand_node). + +is_resource(literal(_)) :- !. +is_resource(R) :- rdf(R,_,_),!. +is_resource(R) :- rdf(_,R,_),!. +is_resource(R) :- rdf(_,_,R),!. + + + +%% rdf_extend_search(!State) is semidet. +% +% Expand the currently best node from the agenda. Fails if the +% agenda is empty. If the next node has no followers, it silently +% continues to the next node on the agenda. + +rdf_extend_search(State) :- + state_graph(State, Graph), + state_expand_node(State, ExpandNode), + search_graph_next_agenda(Graph, Expand, Score), + state_threshold(State, Threshold), + state_edge_limit(State, EdgeLimit), + state_target_expand(State, TargetExpand), + ( TargetExpand == false, + search_graph_node_type(Graph, Expand, target) + -> debug(rdf_search, 'Stop expanding target ~p (score ~2f)', + [Expand, Score]) + ; Score >= Threshold + -> ( EdgeLimit > 0 + -> answer_set(L, edge(ExpandNode, Expand, Score, L), EdgeLimit, Links) + ; findall(L, edge(ExpandNode, Expand, Score, L), Links) + ), + length(Links, Len), + debug(rdf_search, 'Expanding ~p (score ~2f, ~D successors)', + [Expand, Score, Len]), + ( Links == [] + -> rdf_extend_search(State) + ; add_edges(Links, Graph, Expand, State), + add_hits(Links, State) + ) + ; debug(rdf_search, 'Stopped expanding at ~p (score ~2f)', + [Expand, Score]), + fail + ). + + +%% edge(:Expand, +Object, +Score, -Link) is nondet. +% +% Generate edges, expanding Object. Returned Link is one of +% +% * i(S,P,W) +% Represents rdf(S,P,Object), costing weight W. +% +% * f(S,P,W) +% Represents rdf(Object,P,S), costing weight W. +% +% In these results, W is the cost traveling over the link. It is +% a float between 0.0 (inifinite cost) and 1.0 (no cost). + +edge(Expand, From, _, Link) :- + var(Expand), !, + edge(From, Link). +edge(Expand, From, Score, Link) :- + call(Expand, From, Score, Link). +%edge(Expand, From, _, Link) :- + %call(Expand, From, Link). + +%% edge(+Object, -Link) is nondet. +% +% Default predicate to generate edges. +% +% @see ClioPatria's expansion is in graph_search:edge/2. + +edge(O, i(S,P,W)) :- + weighted_edge(O, S, P, W), + debug(myedge, 'Expanding ~2f ~p ~p ~p~n', [W, O, P, S]), + W > 0.0001. + +weighted_edge(O, S, P, W) :- + setof(S, edge_i(O, S, P), Ss), + ( predicate_weight(P, W) + -> member(S, Ss) + ; length(Ss, Len), + member(S, Ss), + subject_weight(S, Len, W) + ). + +edge_i(O, S, P) :- + rdf(S, P, O). +edge_i(O, S, P) :- + rdf(O, P0, S), + rdf_has(P0, owl:inverseOf, P). + +predicate_weight(P, 1) :- + rdfs_subproperty_of(P, rdfs:label). +predicate_weight(P, 1) :- + rdf_equal(P, owl:sameAs). + +subject_weight(S, _, 1) :- + rdf_is_bnode(S), !. +subject_weight(_, Count, W) :- + W is 1/max(3, Count). + +%% add_literals(+Lits:list(Score-Literal), +Graph) is det. +% +% Adds the starting points for the search. + +add_literals([], Assoc, _, Assoc). +add_literals([Score-Lit|T], Assoc0, Graph, Assoc) :- + put_assoc(literal(Lit), Assoc0, found, Assoc1), + search_graph_add_node(Graph, literal(Lit), + [ score(Score), + type(start) + ]), + add_literals(T, Assoc1, Graph, Assoc). + +add_literals_no_score([], Assoc, _, Assoc). +add_literals_no_score([_S-Lit|T], Assoc0, Graph, Assoc) :- + put_assoc(literal(Lit), Assoc0, found, Assoc1), + search_graph_add_node(Graph, literal(Lit), + [ score(1), + type(start) + ]), + add_literals_no_score(T, Assoc1, Graph, Assoc). + +add_resources([], Assoc, _, Assoc, []). +add_resources([R|Rs], Assoc0, Graph, Assoc, [f(R)|Ls]) :- + put_assoc(R, Assoc0, found, Assoc1), + search_graph_add_node(Graph, R, + [ score(1), + type(start) + ]), + add_resources(Rs, Assoc1, Graph, Assoc, Ls). + + +%% add_edges(+Links, +Graph, +O, +State) is det. +% +% Links is a list of i(S,P,W) or f(S,P,W). If the graph already +% contains an edge that is considered a reverse property of the +% new edge, we do not add the new edge. + +add_edges([], _, _, _). +add_edges([H|T], Graph, Expand, State) :- + add_edge(H, Expand, State), + add_edges(T, Graph, Expand, State). + +add_edge(Link, O, State) :- + state_graph(State, Graph), + options_from_link(Link, S, P, Options, IF), + ( inverse_property(P, IP), + ( IF == i % Adding inverse property + -> search_graph_rdf(Graph, O, IP, S) + ; search_graph_rdf(Graph, S, IP, O) + ), + debug(rdf_search(inverse), 'Found ~p ~p ~p', [S, IP, O]) + -> true + ; search_graph_add_edge(Graph, O, P, S, Options) + ). + +options_from_link(i(S,P,W), S, P, [ weight(W), inverse(true) ], i). +options_from_link(f(S,P,W), S, P, [ weight(W) ], f). + +%% inverse_property(+P1, ?P2) is nondet. +% +% True if P1 and P2 are each others inverses. + +inverse_property(P1, P2) :- + rdf_has(P1, owl:inverseOf, P2). +inverse_property(P1, P2) :- + rdf_has(P2, owl:inverseOf, P1). +inverse_property(P, P) :- + rdf(P, rdf:type, owl:'SymmetricProperty'). + + +%% add_hits(+Links, +State) is det. +% +% Hits that satisfy target_condition are added to the target list. + +add_hits([], _) :- !. +add_hits(Links, State) :- + state_target_condition(State, Cond), + findall(S, ( member(H,Links), + arg(1, H, S), + call(Cond, S) + ), Hits0), + sort(Hits0, Hits), + ( Hits = [] + -> true + ; state_targets(State, Assoc0), + state_graph(State, Graph), + state_target_count(State, C0), + add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C), + set_target_count_of_state(C, State), + set_targets_of_state(Assoc, State) + ). + +add_hits_to_target([], Assoc, C, _, Assoc, C). +add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :- + get_assoc(Hit, Assoc0, _), !, + add_hits_to_target(Hits, Assoc0, C0, Graph, Assoc, C). +add_hits_to_target([Hit|Hits], Assoc0, C0, Graph, Assoc, C) :- + C1 is C0 + 1, + put_assoc(Hit, Assoc0, found, Assoc1), % Other value? + ( search_graph_node_type(Graph, Hit, start) + -> true + ; search_graph_set_node_type(Graph, Hit, target) + ), + add_hits_to_target(Hits, Assoc1, C1, Graph, Assoc, C). + + +%% rdf_prune_search(!Graph) is det. +% +% Prune all dead-ends from the search graph. + +rdf_prune_search(State) :- + rdf_prune_search(State, []). +rdf_prune_search(State, Options) :- + state_targets(State, Assoc), + assoc_to_list(Assoc, Pairs), + pairs_keys_values(Pairs, TNodes, _), + state_graph(State, Graph), + search_graph_drains(Graph, Drains), + sort(TNodes, TNodeSet), + sort(Drains, DrainSet), + ord_subtract(DrainSet, TNodeSet, DeadEnds), + ( memberchk(recursive, Options) + -> full_prune(Graph, DeadEnds) + ; search_graph_prune(Graph, DeadEnds, _) + ). + +full_prune(_Graph, []) :- !. +full_prune(Graph, DeadEnds) :- + search_graph_prune(Graph, DeadEnds, NewLeaves), + full_prune(Graph, NewLeaves). + + +%% rdf_search_property(+Graph, ?Prop). +% +% Extract features from the search. Defined properties are: +% +% * target_count(-Count) +% Number of (unique) targets found. +% +% * targets(-Targets:list(Score-Target) +% List of targets found sorted by Score (highest score first) +% There are no duplicate targets in the list. +% +% * graph_size(-Count) +% Number of nodes in the search-graph +% +% * sources(-List) +% List of source-nodes (start of paths). +% +% * drains(-List) +% List if drain-nodes (end of paths). +% +% * rdf(-RDF:list(rdf(S,P,O))) +% Extract the current graph as a set of RDF triples. + +rdf_search_property(Graph, Prop) :- + search_property(Prop, Graph). + +search_property(graph(Graph), State) :- + state_graph(State, Graph). +search_property(state_start(Assoc), State) :- + state_start(State, Assoc). +search_property(start(Pairs), State) :- + state_start(State, Assoc), + state_graph(State, Graph), + assoc_to_list(Assoc, Nodes), + scored_nodes(Nodes, Graph, Scored), + keysort(Scored, Sorted), + reverse(Sorted, Pairs). +search_property(target_count(C), State) :- + state_target_count(State, C). +search_property(state_targets(Assoc), State) :- + state_targets(State, Assoc). +search_property(targets(L), State) :- + state_targets(State, Assoc), + state_graph(State, Graph), + assoc_to_list(Assoc, Pairs), + scored_nodes(Pairs, Graph, Scored), + keysort(Scored, Sorted), + reverse(Sorted, L). +search_property(graph_size(C), State) :- + state_graph(State, Graph), + search_graph_size(Graph, C). +search_property(agenda(Agenda), State) :- + state_graph(State, Graph), + search_graph_agenda(Graph, Agenda). +search_property(drains(Drains), State) :- + state_graph(State, Graph), + search_graph_drains(Graph, Drains). +search_property(sources(Sources), State) :- + state_graph(State, Graph), + search_graph_sources(Graph, Sources). +search_property(node_score_list(Scores), State) :- + state_graph(State, Graph), + search_graph_nodes_score_list(Graph, Scores). +search_property(rdf(Triples), State) :- + state_graph(State, Graph), + search_graph_rdf_graph(Graph, Triples). + + +scored_nodes([], _, []) :- !. +scored_nodes([H-_|T0], Graph, [S-H|T]) :- + search_graph_node_score(Graph, H, S),!, + scored_nodes(T0, Graph, T). +scored_nodes([_|T0], Graph, T) :- + scored_nodes(T0, Graph, T). diff --git a/lib/cluster_search/rdfs_plus_skos.pl b/lib/cluster_search/rdfs_plus_skos.pl new file mode 100644 index 0000000..96ff9f7 --- /dev/null +++ b/lib/cluster_search/rdfs_plus_skos.pl @@ -0,0 +1,574 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(rdfs_plus_skos, + [ rdfs_plus_skos/5, % +ExtMap, +QueryMap, ?S, ?P, ?O + rdfs_plus_skos/8, % +ExtMap, +QueryMap, ?S, ?P, ?O, -RealS, -RealP, -RealO + + rdfs_plus_skos_opt/4, % ?S, ?P, ?O, +Options + rdfs_plus_skos_opt/7, % ?S, ?P, ?O, -RealS, -RealP, -RealO, +Options + + representative/3, % +ExtMap, +R, -Representative + resource_ext_map/2, % +Options, -Map + query_ext_map/2 % +Options, -Map + ]). + +:- use_module(library(assoc)). +:- use_module(library(error)). +:- use_module(library(debug)). +:- use_module(library('semweb/rdf_db')). +:- use_module(library('semweb/rdfs')). + + +:- rdf_meta + rdfs_plus_skos(+,+,r,r,o), + rdfs_plus_skos(+,+,r,r,o,-,-,-), + rdfs_plus_skos_opt(r,r,o,+), + rdfs_plus_skos_opt(r,r,o,-,-,-,+), + + same(r,r), + same(r,r,r), + skos_broader(r,r), + same_skos_broader(r,r,r), + + inverse_predicate(r,r). + + +rdf_optimise:rdf_db_goal(rdfs_plus_skos(_,_,S,P,O), S,P,O). +rdf_optimise:subj_branch_factor(rdfs_plus_skos(_,_,_,_,_), X, rdfs_subject_branch_factor(X)). +rdf_optimise:obj_branch_factor(rdfs_plus_skos(_,_,_,_,_), X, rdfs_object_branch_factor(X)). + +rdf_optimise:rdf_db_goal(rdfs_individual_of(S,C), S,'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',C). +rdf_optimise:subj_branch_factor(rdfs_individual_of(_,_), X, rdfs_subject_branch_factor(X)). +rdf_optimise:obj_branch_factor(rdfs_individual_of(_,_), X, rdfs_object_branch_factor(X)). + + +%% rdfs_plus_skos_opt(?S, ?P, ?O, +Options). +%% rdfs_plus_skos_opt(?S, ?P, ?O, ?RealS, ?RealP, ?RealO, +Options). +% +% Same as rdf_has/4, but with optional reasoning. +% +% Options +% * owl_sameas +% * owl_inverse +% * owl_transitive +% * owl_symmetric +% * skos_exact +% * skos_broader +% * rdf_value + +rdfs_plus_skos_opt(S, P, O, Options) :- + rdfs_plus_skos_opt(S, P, O, _,_,_, Options). + +rdfs_plus_skos_opt(S, P, O, RS, RP, RO, Options) :- + instantiated(S, O, IMap), + resource_ext_map(Options, RMap), + query_ext_map(Options, QMap), + rdfs_plus_skos_(IMap, RMap, QMap, S,P,O, RS,RP,RO). + +%% rdfs_plus_skos(+ExtMap, +QueryMap, ?S, ?P, ?O). +% % rdfs_plus_skos(+ExtMap,+QueryMap, ?S, ?P, ?O, -RealS,-RealP,-RealO). +% +% Do Goal and extend S,P,O to RealS,RealP,RealO based on resource +% and property extension predicates. + +rdfs_plus_skos(RMap, QMap, S, P, O) :- + instantiated(S, O, IMap), + rdfs_plus_skos_(IMap, RMap, QMap, S, P, O, _, _, _). +rdfs_plus_skos(RMap, QMap, S, P, O, RealS, RealP, RealO) :- + instantiated(S, O, IMap), + rdfs_plus_skos_(IMap, RMap, QMap, S, P, O, RealS, RealP, RealO). + +rdfs_plus_skos_(0b00, _RMap, QMap, S,P,O, S,RP,O) :- + rdf_has_ext(QMap, S, P, O, RP). +rdfs_plus_skos_(0b10, RMap, QMap, S,P,O, RS,RP,O) :- + resource_ext(RMap, S, RS), + rdf_has_ext(QMap, RS, P, O, RP). +rdfs_plus_skos_(0b01, RMap, QMap, S,P,O, S,RP,RO) :- + resource_ext(RMap, O, RO), + rdf_has_ext(QMap, S, P, RO, RP). +rdfs_plus_skos_(0b11, RMap, QMap, S,P,O, RS,RP,RO) :- + resource_ext(RMap, S, RS), + resource_ext(RMap, O, RO), + rdf_has_ext(QMap, RS, P, RO, RP). + + +%% resource_ext(+Map, +R0, -R) +% +% True if R0 is R or derivable as defined by Map. + +resource_ext(0b000, R, R). + +resource_ext(0b100, R0, R) :- + same(owl:sameAs, R0, R). +resource_ext(0b010, R0, R) :- + same(skos:exactMatch, R0, R). +resource_ext(0b001, R0, R) :- + skos_broader(R0, R). + +resource_ext(0b110, R0, R) :- + rdf_global_term([owl:sameAs,skos:exactMatch], Ps), + same(Ps, R0, R). +resource_ext(0b101, R0, R) :- + same_skos_broader(owl:sameAs, R0, R). +resource_ext(0b011, R0, R) :- + same_skos_broader(skos:exactMatch, R0, R). + +resource_ext(0b111, R0, R) :- + rdf_equal(P1,owl:sameAs), + rdf_equal(P2,skos:exactMatch), + same_skos_broader([P1,P2], R0, R). + + +%% rdf_has_ext(+QMap, ?S, ?P, ?O, -RealP) +% +% As rdf_has/4, but with additional reasoning as indicated by +% QMap, see query_ext_map/2. + +rdf_has_ext(0b0000, S, P, O, RealP) :- + rdf_has(S, P, O, RealP). + +rdf_has_ext(0b1000, S, P, O, RealP) :- + rdfs_plus_inverse(S, P, O, RealP). +rdf_has_ext(0b0100, S, P, O, RealP) :- + rdfs_plus_symmetric(S, P, O, RealP). +rdf_has_ext(0b0010, S, P, O, RealP) :- + rdfs_plus_transitive(S, P, O, RealP). +rdf_has_ext(0b0001, S, P, O, RealP) :- + rdfs_plus_value(S, P, O, RealP). + +rdf_has_ext(0b1100, S, P, O, RealP) :- + rdfs_plus_inverse_symmetric(S, P, O, RealP). +rdf_has_ext(0b1010, S, P, O, RealP) :- + rdfs_plus_inverse_transitive(S, P, O, RealP). +rdf_has_ext(0b1001, S, P, O, RealP) :- + rdfs_plus_inverse_value(S, P, O, RealP). +rdf_has_ext(0b0110, S, P, O, RealP) :- + rdfs_plus_symmetric_transitive(S, P, O, RealP). +rdf_has_ext(0b0101, S, P, O, RealP) :- + rdfs_plus_symmetric_value(S, P, O, RealP). +rdf_has_ext(0b0011, S, P, O, RealP) :- + rdfs_plus_transitive_value(S, P, O, RealP). + +rdf_has_ext(0b1110, S, P, O, RealP) :- + rdfs_plus_inverse_symmetric_transitive(S, P, O, RealP). +rdf_has_ext(0b1011, S, P, O, RealP) :- + rdfs_plus_inverse_transitive_value(S, P, O, RealP). +rdf_has_ext(0b1101, S, P, O, RealP) :- + rdfs_plus_inverse_symmetric_value(S, P, O, RealP). +rdf_has_ext(0b0111, S, P, O, RealP) :- + rdfs_plus_symmetric_transitive_value(S, P, O, RealP). + +rdf_has_ext(0b1111, S, P, O, RealP) :- + rdfs_plus_inverse_symmetric_transitive_value(S, P, O, RealP). + + + + + + + + + + +%% rdfs_plus_value(?S, ?P, ?O, -SP) +% +% Same as rdf_has/4 but return the rdf:value of Object in case it +% exists. If Object is ground test both for existence of the +% normal triple as well as Object as an rdf:value. + +rdfs_plus_value(S, P, O, RP) :- + ground(O), !, + ( rdf_has(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdf_has(S, P, O0, RP) + ). +rdfs_plus_value(S, P, O, RP) :- + rdf_has(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). + +%% rdfs_plus_inverse(?S, ?P, ?O, ?RealP) +% +% As rdf_has/4 but include inverse properties of P. + +rdfs_plus_inverse(S, P, O, RP) :- + rdf_has(S, P, O, RP). +rdfs_plus_inverse(S, P, O, RP) :- + ground(P), !, + inverse_predicate(P, IP), + rdf_has(O, IP, S, RP). + +%% rdfs_plus_transitive(?S, ?P, ?O, ?RealP) +% +% As rdf_has/4 but use rdf_reachable/3 for transitive properties. +% +% @TBD RealP is not correct. + +rdfs_plus_transitive(S, P, O, P) :- + ground(P), + rdfs_individual_of(P, owl:'TransitiveProperty'), !, + rdf_reachable(S, P, O). +rdfs_plus_transitive(S, P, O, RP) :- + rdf_has(O, P, S, RP). + +%% rdfs_plus_symmetric(?S, ?P, ?O, ?RealP) +% +% As rdf_has/4 but include the inverse for symmetric properties. + +rdfs_plus_symmetric(S, P, O, RP) :- + rdf_has(S, P, O, RP). +rdfs_plus_symmetric(S, P, O, RP) :- + ground(P), + rdfs_individual_of(P, owl:'SymmetricProperty'), !, + rdf_has(O, P, S, RP). + +%% rdfs_plus_inverse_symmetric(?S, ?P, ?O, ?RealP). +%% rdfs_plus_inverse_transitive(?S, ?P, ?O, ?RealP). +%% rdfs_plus_inverse_value(?S, ?P, ?O, ?RealP). +%% rdfs_plus_symmetric_transitive(?S, ?P, ?O, ?RealP). +%% rdfs_plus_symmetric_value(?S, ?P, ?O, ?RealP). +%% rdfs_plus_transitive_value(?S, ?P, ?O, ?RealP). +% +% As rdf_has/4 but include two types of reasoning. + +rdfs_plus_inverse_symmetric(S, P, O, RP) :- + rdf_has(S, P, O, RP). +rdfs_plus_inverse_symmetric(S, P, O, RP) :- + ground(P), + inverse_predicate(P, IP), + rdf_has(O, IP, S, RP). +rdfs_plus_inverse_symmetric(S, P, O, RP) :- + ground(P), + rdfs_individual_of(P, owl:'SymmetricProperty'), !, + rdf_has(O, P, S, RP). + +rdfs_plus_inverse_transitive(S, P, O, RP) :- + rdfs_plus_transitive(S, P, O, RP). +rdfs_plus_inverse_transitive(S, P, O, RP) :- + ground(P), !, + inverse_predicate(P, IP), + rdfs_plus_transitive(O, IP, S, RP). + +rdfs_plus_inverse_value(S, P, O, RP) :- + rdfs_plus_value(S, P, O, RP). +rdfs_plus_inverse_value(S, P, O, RP) :- + ground(P), !, + inverse_predicate(P, IP), + rdfs_plus_value(O, IP, S, RP). + +rdfs_plus_symmetric_transitive(S, P, O, RP) :- + rdfs_plus_transitive(S, P, O, RP). +rdfs_plus_symmetric_transitive(S, P, O, RP) :- + ground(P), + rdfs_individual_of(P, owl:'SymmetricProperty'), !, + rdfs_plus_transitive(O, P, S, RP). + +rdfs_plus_symmetric_value(S, P, O, RP) :- + rdfs_plus_value(S, P, O, RP). +rdfs_plus_symmetric_value(S, P, O, RP) :- + ground(P), + rdfs_individual_of(P, owl:'SymmetricProperty'), !, + rdfs_plus_value(O, P, S, RP). + +rdfs_plus_transitive_value(S, P, O, RP) :- + var(O), !, + rdfs_plus_transitive(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). +rdfs_plus_transitive_value(S, P, O, RP) :- + ground(O), !, + ( rdfs_plus_transitive(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdfs_plus_transitive(S, P, O0, RP) + ). + +%% rdfs_plus_inverse_symmetric_transitive(?S, ?P, ?O, -RealP). +%% rdfs_plus_inverse_transitive_value(?S, ?P, ?O, -RealP). +%% rdfs_plus_symmetric_transitive_value(?S, ?P, ?O, -RealP). +%% rdfs_plus_inverse_symmetric_value(?S, ?P, ?O, -RealP). +% +% As rdf_has/4, but include 3 types of reasoning. + +rdfs_plus_inverse_symmetric_transitive(S, P, O, RP) :- + rdfs_plus_transitive(S, P, O, RP). +rdfs_plus_inverse_symmetric_transitive(S, P, O, RP) :- + ground(P), + inverse_predicate(P, IP), + rdfs_plus_transitive(O, IP, S, RP). +rdfs_plus_inverse_symmetric_transitive(S, P, O, RP) :- + ground(P), + rdfs_individual_of(P, owl:'SymmetricProperty'), !, + rdfs_plus_symmetric(O, P, S, RP). + +rdfs_plus_inverse_transitive_value(S, P, O, RP) :- + var(O), !, + rdfs_plus_inverse_transitive(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). +rdfs_plus_inverse_transitive_value(S, P, O, RP) :- + ground(O), !, + ( rdfs_plus_inverse_transitive(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdfs_plus_inverse_transitive(S, P, O0, RP) + ). + +rdfs_plus_symmetric_transitive_value(S, P, O, RP) :- + var(O), !, + rdfs_plus_symmetric_transitive(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). +rdfs_plus_symmetric_transitive_value(S, P, O, RP) :- + ground(O), !, + ( rdfs_plus_symmetric_transitive(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdfs_plus_symmetric_transitive(S, P, O0, RP) + ). + +rdfs_plus_inverse_symmetric_value(S, P, O, RP) :- + var(O), !, + rdfs_plus_inverse_symmetric(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). +rdfs_plus_inverse_symmetric_value(S, P, O, RP) :- + ground(O), !, + ( rdfs_plus_inverse_symmetric(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdfs_plus_inverse_symmetric(S, P, O0, RP) + ). + + +rdfs_plus_inverse_symmetric_transitive_value(S, P, O, RP) :- + var(O), !, + rdfs_plus_inverse_symmetric_transitive(S, P, O0, RP), + ( rdf_has(O0, rdf:value, O) + -> true + ; O = O0 + ). +rdfs_plus_inverse_symmetric_transitive_value(S, P, O, RP) :- + ground(O), !, + ( rdfs_plus_inverse_symmetric_transitive(S, P, O, RP) + ; rdf_has(O0, rdf:value, O), + rdfs_plus_inverse_symmetric_transitive(S, P, O0, RP) + ). + + + +%% same_skos_broader(+Eq, +R0, -R). +% +% True if R is RO or reachable through Eq or skos:broader. + +same_skos_broader(Eq, R0, R) :- + same(Eq, R0, R1), + skos_broader(R1, R2), + same(Eq, R2, R). + +%% skos_broader(?R0, ?R) +% +% True if R = R0 or reachable true skos:broader or inversly +% through skos:narrower. + +skos_broader(R0, R) :- + rdf_reachable(R, skos:broader, R0). +skos_broader(R0, R) :- + rdf_reachable(R0, skos:narrower, R). + +%% same(+EqP, +R0, -R) is nondet. +%% same(+EqP, -R0, +P) is nondet. +% +% True if R is R0 or reachable through P. + +same(P, R0, R) :- + atom(R0), !, + empty_assoc(V0), + put_assoc(R0, V0, true, V), + same_(R0, R, P, V). +same(P, R0, R) :- + atom(R), !, + same(P, R, R0). +same(_P, R0, _R) :- + instantiation_error(R0). + +same_(R, R, _, _). +same_(R0, R, Ps, V) :- + ( is_list(Ps) + -> member(P, Ps) + ; P = Ps + ), + same_inv(R0,P,R1), + \+ get_assoc(R1, V, true), + put_assoc(R1, V, true, V2), + same_(R1, R, P, V2). + + +same_inv(R0,P,R1) :- + rdf_has(R0,P,R1). +same_inv(R0,P,R1) :- + rdf_has(R1,P,R0). + +%% inverse_predicate(+P1, +P2) is semidet. +% +% True if P1 and P2 are each others inverses. + +inverse_predicate(P1, P2) :- + rdf_has(P1, owl:inverseOf, P2), !. +inverse_predicate(P1, P2) :- + rdf_has(P2, owl:inverseOf, P1). + + +%% representative(+EMap, +R, -Representative) +% +% Representative is the representative URI for a set of resources +% equivalent with R. + +representative(EMap, R, Represent) :- + EMap > 1, !, + equivalence_set(EMap, R, Set0), + sort(Set0, [Represent|_]). +representative(_EMap, R, R). + +%% equivalence_set(+EMap, +R, -Set) +% +% Set contains R and all its equivalent resources. + +equivalence_set(EMap, R, Set) :- + ( EMap >= 6 + -> Ps = [owl:sameAs,skos:exactMatch] + ; EMap >= 4 + -> Ps = [owl:sameAs] + ; EMap >= 2 + -> Ps = [skos:exactmatch] + ), + findall(S, same(Ps, R, S), Set). + +%% instantiated(?S,?O,-Bitmap) +% +% Bitmap indicates instantiation of S and O. + +instantiated(S,O,I) :- + ( atom(S) -> I0 = 0b10 ; I0 = 0b00 ), + ( atom(O) -> I is I0\/0b01 ; I is I0\/0b00 ). + +%% resource_map(+Options, -Map) +% +% Bitmap indicates reseaning over resources. + +resource_ext_map(Opt, M) :- + ( memberchk(owl_sameas, Opt) -> M0 = 0b100 ; M0 = 0b000 ), + ( memberchk(skos_exact, Opt) -> M1 is M0\/0b010 ; M1 is M0\/0b000 ), + ( memberchk(skos_broader, Opt) -> M is M1\/0b001 ; M is M1\/0b000 ). + +%% query_ext_map(+Options, -Map) +% +% Bitmap indicates reasoning. + +query_ext_map(Opt, M) :- + ( memberchk(owl_inverse, Opt) -> M0 = 0b1000 ; M0 = 0b0000 ), + ( memberchk(owl_symmetric, Opt) -> M1 is M0\/0b0100 ; M1 is M0\/0b0000 ), + ( memberchk(owl_transitive, Opt) -> M2 is M1\/0b0010 ; M2 is M1\/0b0000 ), + ( memberchk(rdf_value, Opt) -> M is M2\/0b0001 ; M is M2\/0b0000 ). + + + + + +:- rdf_register_ns(t, 'http://test.com/'). + +%% assert_test_graph +% +% Create simple test graph. + +assert_test_graph :- + rdf_assert(t:work1, t:location, t:paris1), + rdf_assert(t:work1, owl:sameAs, t:work2), + rdf_assert(t:work3, owl:sameAs, t:work1), + rdf_assert(t:work3, t:location, t:paris3), + rdf_assert(t:paris3, skos:exactMatch, t:paris4), + rdf_assert(t:paris4, skos:broader, t:france4), + rdf_assert(t:paris1, skos:broader, t:france1), + rdf_assert(t:france1, skos:exactMatch, t:france2), + rdf_assert(t:location, owl:inverseOf, t:locationOf), + rdf_assert(t:work4, t:location, t:place), + rdf_assert(t:place, rdf:value, t:paris4). + + +%% rdfs_plus_skos_test(+Test) +% +% Tests rdfs_plus_skos reasoning. + +rdfs_plus_skos_test(1) :- + \+ rdfs_plus_skos_opt(t:work1, t:location, t:france1, []). +rdfs_plus_skos_test(2) :- + rdfs_plus_skos_opt(t:work1, t:location, t:france1, + [skos_broader]). +rdfs_plus_skos_test(3) :- + \+ rdfs_plus_skos_opt(t:work1, t:location, t:france2, + [skos_broader]). +rdfs_plus_skos_test(4) :- + rdfs_plus_skos_opt(t:work1, t:location, t:france2, + [skos_broader, skos_exact]). +rdfs_plus_skos_test(5) :- + \+ rdfs_plus_skos_opt(t:work2, t:location, t:paris3, + [skos_broader]). +rdfs_plus_skos_test(6) :- + rdfs_plus_skos_opt(t:work2, t:location, t:paris3, + [skos_broader, owl_sameas]). +rdfs_plus_skos_test(7) :- + \+ rdfs_plus_skos_opt(t:work2, t:location, t:france4, + [skos_broader, owl_sameas]). +rdfs_plus_skos_test(8) :- + rdfs_plus_skos_opt(t:work2, t:location, t:france4, + [skos_broader, owl_sameas, skos_exact]). +rdfs_plus_skos_test(9) :- + rdfs_plus_skos_opt(t:paris1, t:locationOf, t:work1, + [owl_inverse]). +rdfs_plus_skos_test(10) :- + rdfs_plus_skos_opt(t:france1, t:locationOf, t:work1, + [owl_inverse, skos_broader]). +rdfs_plus_skos_test(11) :- + rdfs_plus_skos_opt(t:france2, t:locationOf, t:work1, + [owl_inverse, skos_broader, skos_exact]). +rdfs_plus_skos_test(12) :- + rdfs_plus_skos_opt(t:france4, t:locationOf, t:work2, + [owl_inverse, skos_broader, owl_sameas, skos_exact]). +rdfs_plus_skos_test(13) :- + \+ rdfs_plus_skos_opt(t:work4, t:location, t:paris4, []). +rdfs_plus_skos_test(14) :- + rdfs_plus_skos_opt(t:work4, t:location, t:paris4, + [rdf_value]). +rdfs_plus_skos_test(15) :- + rdfs_plus_skos_opt(t:work4, t:location, t:france4, + [skos_broader, rdf_value]). + + + diff --git a/lib/cluster_search/tree.pl b/lib/cluster_search/tree.pl new file mode 100644 index 0000000..46ed16b --- /dev/null +++ b/lib/cluster_search/tree.pl @@ -0,0 +1,314 @@ +/* This file is part of ClioPatria. + + Author: + HTTP: http://e-culture.multimedian.nl/ + GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git + GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git + GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git + Copyright: 2007, E-Culture/MultimediaN + + ClioPatria is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + ClioPatria is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ClioPatria. If not, see <http://www.gnu.org/licenses/>. +*/ + +:- module(tree, + [ resource_tree/4, % +Resources, +Root, +Property, -Tree + create_tree/4, % +Elems, +Root, -Tree, +Options + format_tree/2, + tree_to_json/3, + key_rank/3, + sort_tree_nodes/3 % +Type, +Nodes, -Sorted + ]). + +:- use_module(library(semweb/rdf_db)). +:- use_module(iface_util). +:- use_module(rdfs_plus_skos). + + +%% create_tree(+Elems, +Root, -Tree:node(R,Atrr,Children), +Options) +% +% Tree contains all Elems that are reachable from Root. +% Options is a list of: +% +% parent(:Goal) +% goal that satisfies for hierarchical relation +% +% add_to_root(+Boolean) +% When true dangling resources are children of root +% +% close(+Boolean) +% Close the tree +% +% attribute(+Atom) +% Attribute assisgned to members of Elem +% +% add_label(+Boolean) +% Add label as attribute + +create_tree(Rs, Root, Tree, Options) :- !, + Tree = node(Root, _, _), + rdf_equal(iface:broader, Rel), + option(parent(Goal), Options, parent(Rel)), + option(add_to_root(AddToRoot), Options, false), + option(attribute(Attr), Options, [hit]), + option(add_label(AddLabel), Options, false), + make_tree(Rs, Goal, AddToRoot, AddLabel, Attr, Tree), + ( option(leaf_children(true), Options, true) + -> close_tree_and_children_check(Tree, Goal) + ; ( option(close(true), Options, true) + -> close_tree(Tree) + ; true + ) + ). + +%% resource_tree(+Resource, +Root, +Relation, -Tree:node(R,Atrr,Children)) +% +% Tree contains all Resources that are reachable from Root +% through Relation. + +resource_tree(Rs, Root, Rel, Tree) :- + Tree = node(Root, _, _), + make_tree(Rs, parent(Rel), false, false, [hit], Tree), + close_tree(Tree). + + +%% value_format(+Value, -URI, -Attrs) +% +% Guess format of value. + +value_format(R, AddLabel, R, Attrs) :- + atom(R), + ( AddLabel + -> iface_label(R, L), + Attrs = [label(L)] + ; Attrs = [] + ). +value_format(hit(R,_,L,_), _, R, [label(L)]). +value_format(Score-H, AddLabel, R, [score(Score)|A]) :- + number(Score), !, + value_format(H, AddLabel, R, A). +value_format(H-A, AddLabel, R, Attrs) :- + value_format(H, AddLabel, R, A0), + is_list(A), + append(A0, A, Attrs), + !. + +%% make_tree(+Matches, +ParentGoal, +AddToRoot, +%% +AddLabel, +Attributes, -Tree) +% +% Tree is of the form node(Resource, Attributes, Children) + +make_tree([], _, _, _, _, _). +make_tree([H|T], Goal, AddToRoot, AddLabel, A, Tree) :- + value_format(H, AddLabel, R, Attrs), + in_tree(R, Tree, Node), !, + append(A, Attrs, A0), + add_attrs(Node, A0), + make_tree(T, Goal, AddToRoot, AddLabel, A, Tree). +make_tree([H|T], Goal, AddToRoot, AddLabel, A, Tree) :- + value_format(H, AddLabel, R, Attrs), + call(Goal, R, Parent), + Parent \== R, % JW: Hack for direct cycles (AAT) + make_tree([Parent], Goal, AddToRoot, AddLabel, [], Tree), + in_tree(Parent, Tree, Node), !, + Node = node(_, _, Children), + memberchk(node(R, A1, _), Children), + append(A, Attrs, A0), + once(append(A0, _, A1)), + make_tree(T, Goal, AddToRoot, AddLabel, A, Tree). +make_tree([H|T], Goal, AddToRoot, AddLabel, A, Tree) :- + value_format(H, AddLabel, R, Attrs), + Tree = node(_,_,Children), + append(A, Attrs, A0), + ( AddToRoot == true + -> memberchk(node(R, A1, _), Children), + once(append(A0, _, A1)) + ; AddToRoot == top, + rdf_has(Scheme, skos:hasTopConcept, R) + -> iface_label(Scheme, Label), + memberchk(node(R, A1, _), Children), + once(append([conceptScheme(Label)|A0], _, A1)) + ; true + ), + make_tree(T, Goal, AddToRoot, AddLabel, A, Tree). + +parent(P, H, Parent) :- + rdfs_plus_skos_opt(H, P, Parent, [owl_inverse]), !. % use inverse_predicate/2 if too slow + +add_attrs(node(_, A0, _), A) :- + subset(A, A0), !. + +in_tree(_, Var, _) :- + var(Var), !, + fail. +in_tree(R, Node, Node) :- + Node = node(R, _, _), !. +in_tree(R, node(_, _, Children), Node) :- + in_children(R, Children, Node). + +in_children(_, Var, _) :- + var(Var), !, + fail. +in_children(R, [H|_], Node) :- + in_tree(R, H, Node), !. +in_children(R, [_|T], Node) :- + in_children(R, T, Node), !. + +close_tree(Var) :- + var(Var), !, + Var = []. +close_tree(node(_, A, Children)) :- !, + close_list(A), + close_tree(Children). +close_tree([H|T]) :- + close_tree(H), + close_tree(T). + +close_list([]) :- !. +close_list([_|T]) :- + close_list(T). + +%% close_tree_and_children_check(+Tree, -Goal) +% +% Check if leafs have children. + +close_tree_and_children_check(Var, _) :- + var(Var), !, + Var = []. +close_tree_and_children_check(node(R, A, Children), Goal) :- + close_list(A), + ( var(Children) + -> ( call(Goal, _, R) + -> Children = true + ; Children = [] + ) + ; close_tree_and_children_check(Children, Goal) + ). +close_tree_and_children_check([H|T], Goal) :- + close_tree_and_children_check(H, Goal), + close_tree_and_children_check(T, Goal). + + +%% format_tree(+Tree) +% +% pretty print a tree. + +format_tree(node(R,Attr,Children), N) :- + N1 is N+1, + ( memberchk(label(L), Attr) + -> true + ; iface_label(R, L) + ), + format_tree_indent(N), + format(' ~w', L), + format_tree_attributes(Attr), + format('~n'), + format_tree_children(Children, N1). + +format_tree_indent(0) :- !. +format_tree_indent(N) :- + N1 is N-1, + format('-'), + format_tree_indent(N1). + +format_tree_attributes([]). +format_tree_attributes([Term|T]) :- + ( Term =.. [label|_] + -> true + ; format(' ~w', [Term]) + ), + format_tree_attributes(T). + +format_tree_children([], _). +format_tree_children([Node|T], N) :- + format_tree(Node, N), + format_tree_children(T, N). + + +%% sort_tree_nodes(+Type, +NodeList, -Sorted) +% +% Sorted is NodeList sorted by Type. + +sort_tree_nodes(Type, Nodes, Sorted) :- + maplist(add_node_key(Type), Nodes, Pairs), + key_rank(Pairs, normal, Sorted). + +add_node_key(uri, Node, Key-Node) :- !, + Node = node(Key,_,_). +add_node_key(children, Node, Key-Node) :- !, + Node = node(_,_,Children), + ( (Children == false; Children == []) + -> Key = 1 + ; Key = 0 + ). +add_node_key(Type, Node, Key-Node) :- + Node = node(_,Attr,_), + Term =.. [Type,Key], + memberchk(Term, Attr), + !. +add_node_key(_, Node, zzz-Node). + +%% tree_to_json(+Tree:node(uri,nodeList), +DisplayProperties, -JSON) +% +% Tree to JSON term. + +tree_to_json(node(R,Attr,Children), Ps, json(Data)) :- + attr_params(Attr, Params), + iface_resource_properties(R, Ps, Vs), + append(Params, Vs, Data0), + ( is_list(Children) + -> Data1 = [children=Nodes|Data0], + nodes_to_json(Children, Ps, Nodes) + ; bool_to_json(Children,HasChildren) + -> Data1 = [hasChildren=HasChildren|Data0] + ; Data1 = Data0 + ), + Data = [uri=R|Data1]. + +nodes_to_json([], _, []) :- !. +nodes_to_json([Node|Nodes], Ps, [JNode|JSON]) :- !, + tree_to_json(Node, Ps, JNode), + nodes_to_json(Nodes, Ps, JSON). +nodes_to_json(Bool, _, JSON) :- + bool_to_json(Bool, JSON). + +bool_to_json(false, @false). +bool_to_json(true, @true). + +attr_params([], []). +attr_params([H|T], [P|Ps]) :- + attr_param(H, P), !, + attr_params(T, Ps). +attr_params([_|T], Ps) :- + attr_params(T, Ps). + +attr_param(Term, Key=Value) :- + Term =.. [Key,Value], + !. +attr_param(hit, hit=Bool) :- + bool_to_json(true, Bool). + + +%% key_rank(+List:key-value, +Type, -RankedList:value) +% +% Values from pair are sorted by keyed. +% +% * Type = forward or reverse + +key_rank(Pairs, reverse, Values) :- !, + keysort(Pairs, Pairs1), + pairs_values(Pairs1, Values0), + reverse(Values0, Values). +key_rank(Pairs, _, Values) :- + keysort(Pairs, Pairs1), + pairs_values(Pairs1, Values). diff --git a/lib/cluster_search/tree_abstract.pl b/lib/cluster_search/tree_abstract.pl new file mode 100644 index 0000000..fac5346 --- /dev/null +++ b/lib/cluster_search/tree_abstract.pl @@ -0,0 +1,383 @@ +:- module(tree_abstract, [ concepts_in_search_state/2, % +State, -Nodes + tree_abstract/3, % +Nodes, +Max, -Reduced + group_items_by_nodes/4, % +Nodes, +Items, +SearchGraph, -Pairs + target_to_nodes/4, % +Nodes, +Rel, +Graph, -Nodes + nodes_to_tree/2 % +Nodes, -Tree + ]). + + +:- use_module(library(semweb/rdf_db)). +:- use_module(library(semweb/rdfs)). +:- use_module(library(semweb/rdf_label)). +:- use_module(library(lists)). +:- use_module(library(debug)). +:- use_module(rdf_graph). +:- use_module(rdf_search). +:- use_module(graph_search). +:- use_module(owl_ultra_lite). + +%% group_items_by_nodes(+Nodes, +Items, +Graph, -Pairs:score-cluster) +% +% Clusters is a collection of a list each containing the Items +% directly related to a member of Nodes. + +group_items_by_nodes([], _, _, []). +group_items_by_nodes([node(root,_,_)|Ns], Items, Graph, Cs) :- !, + %debug(concept_group, '~w', Children), + group_items_by_nodes(Ns, Items, Graph, Cs). +group_items_by_nodes([node(R0,_P,_)|Ns], Items, Graph, [R-Results|Cs]) :- + ( R0 = [Random|_] + -> R = more(Random), + items_related_to_group(Items, R0, Graph, Results, Rest) + ; R = R0, + items_related_to_node(Items, R0, Graph, Results, Rest) + ), + length(Items, ICount), + length(Results, RCount), + debug(concept_group, '~w total ~w, results ~w', [R,ICount,RCount]), + Results \== [], !, + group_items_by_nodes(Ns, Rest, Graph, Cs). +group_items_by_nodes([_|Ns], Items, Graph, Cs) :- + group_items_by_nodes(Ns, Items, Graph, Cs). + + +items_related_to_node([], _, _, [], []). +items_related_to_node([Score-URI|Is], R, Graph, [Score-URI|Rs], Rest) :- + search_graph_rdf(Graph, URI, _, R), !, + items_related_to_node(Is, R, Graph, Rs, Rest). +items_related_to_node([Item|Is], R, Graph, Rs, [Item|Rest]) :- + items_related_to_node(Is, R, Graph, Rs, Rest). + +items_related_to_group([], _, _, [], []). +items_related_to_group([Score-URI|Is], Group, Graph, [Score-URI|Rs], Rest) :- + item_related_to_group(Group, URI, Graph), !, + items_related_to_group(Is, Group, Graph, Rs, Rest). +items_related_to_group([Item|Is], Group, Graph, Rs, [Item|Rest]) :- + items_related_to_group(Is, Group, Graph, Rs, Rest). + +item_related_to_group([R|_], S, Graph) :- + search_graph_rdf(Graph, S, _, R). +item_related_to_group([_|T], S, Graph) :- + item_related_to_group(T, S, Graph). + +% % concepts_in_search_state(+SearchState, -Pairs:score-[node(uri,parent,children)]) +% +% Pairs is a list of score-node pairs contained in the search +% state. + +concepts_in_search_state(State, Concepts) :- + rdf_search_property(State, node_score_list(Nodes)), + rdf_search_property(State, graph(Graph)), + findall(Score-Node, + ( member(Concept-Score, Nodes), + concept(Concept, Graph, Node) + ), + Concepts). + +concept(R, Graph, node(R,Parent,Children)) :- + rdfs_individual_of(R, skos:'Concept'), + rdf_equal(skos:broader, Rel), + %rdfs_individual_of(R, flor:'Sense'), !, + %rdf_equal(flor:isWNSubClass, Rel), + parent(R, Rel, Graph, Parent), + children(R, Rel, Graph, Children). + + +%% target_to_nodes(+Targets, +Rel, +Graph, -Nodes) +% +% Nodes is contains targets and for each target its parent and +% children. + +target_to_nodes([], _, _, []). +target_to_nodes([Score-R|Ts], Rel, Graph, [Score-Node|Ns]) :- + Node = node(R, Parent, Children), + parent(R, Rel, Graph, Parent), + children(R, Rel, Graph, Children), + target_to_nodes(Ts, Rel, Graph, Ns). + +parent(R, Rel, Graph, Parent) :- + search_graph_owl_inv(Graph, R, Rel, Parent), !. +parent(_, _, _, root). + +children(R, Rel, Graph, Children) :- + findall(C, + search_graph_owl_inv(Graph, C, Rel, R), + Children). + + +:- rdf_meta + search_graph_owl_inv(+, r, r, o). + +%% search_graph_owl_inv(+Graph, +S, +P, +O) +% +% Similar as search_graph, but with support for inverse +% properties. + +search_graph_owl_inv(Graph, S, P, O) :- + search_graph_rdf(Graph, S, P, O). +search_graph_owl_inv(Graph, S, P, O) :- + inverse_predicate(P, IP), + search_graph_rdf(Graph, O, IP, S), + \+ search_graph_rdf(Graph, S, P, O). + +%% tree_abstract(+TreeIn, +MaxNumberOfLeaves, -TreeOut) +% +% TreeOut is a reduced version of TreeIn containing only +% MaxNumberOfLeafs. + +tree_abstract(Nodes0, Max, TreeOut) :- + %nodes(TreeIn, Nodes0), + Nodes1 = [0.5-node(root,noparent,[])|Nodes0], + keysort(Nodes1, Nodes), + tree_reduce(Nodes, Max, TreeOut). + +%% tree_reduce(+Nodes, +Max, -Nodes) +% +% Reduce the number of nodes to Max, by +% * merging leafs +% * merge child with parent + +tree_reduce(Nodes, Max, Nodes) :- + length(Nodes, Count), + debug(tree_reduce, '~w nodes', [Count]), + Count =< Max, + !. +tree_reduce([Score-node(R,P,C)|Ns], Max, Reduced) :- + ( C = [] + -> debug(tree_reduce, 'leaf', []), + leaf_reduce(Ns, Score, R, P, Rest) + ; debug(tree_reduce, 'node', []), + node_reduce(Ns, Score, R, P, C, Rest) + ), !, + tree_reduce(Rest, Max, Reduced). +tree_reduce(Nodes, _, Nodes). + +leaf_reduce(Nodes, Score, R, P, Reduced) :- + leaf_merge(Nodes, Score, R, P, Bag, Rest), !, + keysort([Bag|Rest], Reduced). +leaf_reduce(Nodes, _Score, R, P, Reduced) :- + leaf_collapse(Nodes, R, P, Reduced). + +node_reduce(Nodes, Score, R, root, Cs, Reduced) :- + Score1 is Score*2, + keysort([Score1-node(R,root,Cs)|Nodes], Reduced). +node_reduce(Nodes, _Score, R, P, _Cs, Reduced) :- + merge_nodes(Nodes, P, R, Reduced). + %Score1 is Score*2, + %keysort([Score1-node(R,P,[])|Nodes], Reduced). + + +%% leaf_merge(+Nodes, -Bag, -NodesOut) +% +% Leaf nodes with lowest score is merged with similar sibblings. + +leaf_merge(T, Score, R, Parent, Bag, Rest) :- + Bag = Score1-node([R|Sibblings], Parent, []), + sibblings(T, Score, Parent, Sibblings, Rest), + length(Sibblings, Count), + rdf_label(R, Label), + debug(tree_reduce, 'merge ~w: ~w sibblings found', [Label, Count]), + Sibblings \== [], + Score1 = Score. + %Score1 is Score*2. + +%% leaf_collapse(+Nodes, +R, +Parent, -Reduced) +% +% Reduced contains all Nodes and Parent is merged with R. + +leaf_collapse(Nodes, R, Parent, Rest) :- + rdf_label(R, Label), + rdf_label(Parent, PLabel), + debug(tree_reduce, 'collapse ~w with ~w', [Label, PLabel]), + %Rest = Nodes. + merge_nodes(Nodes, Parent, R, Rest). + +merge_nodes([Score-node(Rs,P,C)|T], R, Add, [Score-Node|T]) :- + Node = node(Merged,P,C), + ( is_list(Rs), + member(R, Rs) + -> ( is_list(Add) + -> append(Rs,Add,Merged) + ; append(Rs,[Add],Merged) + ) + ; R == Rs + -> ( is_list(Add) + -> Merged = [R|Add] + ; Merged = [R,Add] + ) + ), + !. +merge_nodes([Node|T], R, Add, [Node|Rest]) :- + merge_nodes(T, R, Add, Rest). + + + +%tree_reduce(Nodes, Max, Reduced) :- +% node_abstract(Nodes, Merged), +% tree_reduce(Merged, Max, Reduced). + + +%% node_abstract(+NodesIn, -NodesOut) +% +% Node with lowest score in NodesIn is folded with parent. +/* +node_abstract([_Score-Node|Rest], Rest) :- + Node = node(_, _, []), + !. +node_abstract([_Score-Node0|T], [Score-Node|Rest]) :- + Node0 = node(R, P, C), + Node = node(Rs, P, []), + add_children(T, P, C, Rest0), + replace_parent(Rest0, R, P, Rest). +*/ + +add_children([Score-Node0|T], R, C, [Score-Node|T]) :- + Node0 = node(R, P, C0), + Node = node(R, P, Children), + append(C, C0, Children), + !. +add_children([Node|T], P, C, [Node|Rest]) :- + add_children(T, P, C, Rest). + +replace_parent([], _, _, []). +replace_parent([Score-Node0|T], P0, P, [Score-Node|Rest]) :- + Node0 = node(R,P0,C), + !, + Node = node(R,P,C), + replace_parent(T, P0, P, Rest). +replace_parent([Node|T], P0, P, [Node|Rest]) :- + replace_parent(T, P0, P, Rest). + + + + +sibblings([S-node(R,P,[])|T], S, P, Sibblings, Rest) :- !, + ( is_list(R) + -> append(R,Rs,Sibblings) + ; Sibblings = [R|Rs] + ), + sibblings(T, S, P, Rs, Rest). +sibblings([S-Node|T], S, P, Sibblings, [S-Node|Rest]) :- !, + sibblings(T, S, P, Sibblings, Rest). +sibblings(T, _, _, [], T). + + + + +tree_test(Keyword, Max, Tree) :- + do_query(Keyword, State), + concepts_in_search_state(State, Nodes), + tree_abstract(Nodes, Max, Reduced), + nodes_to_tree(Reduced, Tree). + %nodes_to_clusters(Reduced, Clusters). + + +do_query(Keyword, State) :- + search_api:basic_filter([], Filter), + graph_search(Keyword, State, [filter(Filter),prune(true),literal_threshold(0)]). + + + + + +tree0(node(a,0,[ + node(b,0,[ + node(c1,0.2,[ + node(d1,1,[]), + node(d2,1,[]) + ]), + node(c2,0,[ + node(d3,0.5,[ + node(e1,0.2,[]) + ]), + node(d4,0.5,[]), + node(d5,0.5,[]) + ]) + ]) + ])). + + +nodes_to_clusters([], []). +nodes_to_clusters([Score-node(R,P,Children)|T], [cluster(Score,Label,Count)|Rest]) :- + node_label(R, P, Label), + length(Children, Count), + nodes_to_clusters(T, Rest). + + +nodes_to_tree(Nodes, node(root, [], Children)) :- + children_of_node(Nodes, root, Nodes, Children). + +children_of_node([], _, _, []). +children_of_node([Score-node(Rs,Parent,_)|T], Parent, Nodes, [Node|Rest]) :- !, + ( Rs = [R|_] + -> true + ; Rs = R + ), + Node = node(R, [score(Score),label(Label)], Children), + node_label(R, Parent, Label), + children_of_node(Nodes, R, Nodes, Children), + children_of_node(T, Parent, Nodes, Rest). +children_of_node([_|T], Parent, Nodes, Rest) :- + children_of_node(T, Parent, Nodes, Rest). + + +node_label(R, Parent, Label) :- + ( is_list(R) + -> rdfs_label(Parent,L), + atom_concat('specific ', L, Label) + ; rdfs_label(R, Label), ! + ). + + +%% nodes(+Tree, -Pairs:score-node) is det +% +% Pairs is a list of score-node pairs contained in Tree. + +nodes(Tree, Leaves) :- + nodes(Tree, root, Leaves, []). + +nodes(node(N,S,[]), P, [S-node(N,P,[])|T], T) :- !. +nodes(node(N,S,Children), P, [S-node(N,P,Rs)|Nodes], T) :- + maplist(node_resource, Children, Rs), + children_nodes(Children, N, Nodes, T). + +children_nodes([], _, T, T). +children_nodes([Node|Ns], P, Leaves, T) :- + nodes(Node, P, Leaves, L), + children_nodes(Ns, P, L, T). + +node_resource(node(R,_,_), R). + +%% leaves(+Tree, -Pairs:score-leaf) is det +% +% Pairs is a list of score-leaf pairs contained in Tree. + +leaves(Tree, Leaves) :- + leaves(Tree, root, Leaves, []). + +leaves(node(N,S,[]), P, [S-node(N,P)|T], T) :- !. +leaves(node(N,_,Children), _, Leaves, T) :- + children_leaves(Children, N, Leaves, T). + +children_leaves([], _, T, T). +children_leaves([Node|Ns], P, Leaves, T) :- + leaves(Node, P, Leaves, L), + children_leaves(Ns, P, L, T). + + +%% leaf_count(+Tree, -Count) is det +% +% Count is the number of leafs in Tree. + +leaf_count(node(_,_,[]), 1) :- !. +leaf_count(node(_,_,Children), C) :- + children_leaf_count(Children, C). + +children_leaf_count([], 0). +children_leaf_count([Node|Ns], C) :- + leaf_count(Node, C1), + children_leaf_count(Ns, C2), + C is C1+C2. + + +