:- module(cluster_search, [cluster_search/2, reply_clusters/1, organize_resources/3]). % http library :- use_module(library(http/http_dispatch)). :- use_module(library(http/http_json)). :- use_module(library(http/http_parameters)). % util modules :- use_module(library(semweb/rdf_db)). :- use_module(library(cluster_search/rdf_cluster)). :- use_module(library(cluster_search/rdf_search)). :- use_module(library(cluster_search/settings)). % search modules :- use_module(library(cluster_search/graph_search)). :- http_handler(cliopatria(cluster_search_api), cluster_search_api, []). :- rdf_meta class_of(r,-). %% cluster_search_api(+Request) % % Retrieves clusters of search results, according to the query. % First it gets the url parameters, second it executes graph % search algorithm, after which the data is prepared for output and % outputted as json. cluster_search_api(Request) :- get_parameters(Request, Options), cluster_search(Clusters, Options), reply_clusters(Clusters). %% get_parameters(+Request, -Options) % % Retrieves an option list of parameters from the url. get_parameters(Request, Options) :- setting(cluster_search:basic_search_target, SearchTarget), http_parameters(Request, [query(Query, [description('Entered Query'), optional(false)]), target(Target, [default(SearchTarget)])]), Options = [query(Query), target(Target)]. %% cluster_search(-Clusters, +Options) % % Executes a graph search algorithm according to given options. % SearchResults is a list of score-resource pairs acquired by % searching the RDF Graph. SearchState contains the search history % including the search graph. cluster_search(Clusters, Options) :- option(query(Query), Options), option(target(Target), Options), debug(query, 'cluster_search Query: ~p', [Query]), SearchOptions = [target(Target), graphOutput(spo), edge_limit(30), threshold(0.001)], graph_search(Query, State, SearchOptions), OrganizeOptions = [groupBy(path)], organize_resources(State, Clusters, OrganizeOptions). %% organize_resources(+Graph, -Data, +Options) % % Data is a structure containing Targets organize_resources(State, clusters(Clusters), Options) :- option(start(Start), Options, 0), option(end(End), Options, 100), rdf_search_property(State, targets(Targets)), search_graph(State, SearchGraph), group_targets_by_value(Targets, SearchGraph, Clusters0, Options), cluster_select(Clusters0, Start, End, Clusters). search_graph(State, Graph) :- rdf_search_property(State, graph(Graph)), !. search_graph(_, []). %% group_targets_by_value(+Targets, +SearchGraph, -Clusters, % +Options) % % Fore every target the path is generated. The path-target list is % sorted according the paths, targets with the same path are % grouped together and converted to dicts. group_targets_by_value(Targets, Graph, Clusters, Options) :- option(groupBy(GroupBy), Options), setting(cluster_search:search_path, Method), result_paths(Targets, Method, GroupBy, Graph, Pairs0), keysort(Pairs0, Pairs), group_pairs_by_key(Pairs, Clusters). %% result_paths(+Targets, +Method, +Abstract, +Graph, -Items) is det. % % Add path between resource and query. result_paths(Targets, Method, Abstract, Graph, Items) :- empty_path_cache(Cache), cached_result_paths(Targets, Method, Abstract, Graph, Cache, _, Items). cached_result_paths([], _, _, _, Cache, Cache, []). cached_result_paths([Target|T], Method, Abstract, Graph, CacheIn, CacheOut, [Pair|Pairs]) :- target(Target, URI, _Score), ( cached_search_path(Method, URI, Graph, CacheIn, CacheTmp, Path0), abstract_path(Abstract, Path0, Path) -> Pair = Path-(Target-Path0) ; Pair = other-Target ), cached_result_paths(T, Method, Abstract, Graph, CacheTmp, CacheOut, Pairs). %% abstract_path(+Type, +Path, -Abstract) is det. % % * Type = path % Only abstract target. % % * Type = spath % Abstract full path to schema level abstract_path(path, [R|Rest0], Path) :- cluster_abstract_class(R, Class), !, partial_schema_path([Class|Rest0], Path). abstract_path(spath, Path, SPath) :- !, (Path = [R] -> cluster_abstract_class(R, Class), SPath = [Class] ; strip_alignment(Path, Path1), schema_path(Path1, SPath0), canonical_path(SPath0, SPath) ). %% cluster_abstract_class(+Class, -Abstract) is det. % % True if Abstract is a high-level class for Class. cluster_abstract_class(Resource, Class) :- class_of(Resource, Class). %% class_of(+ResourceOrLiteral, -Class:atom) is det. % % Class is the class of ResourceOrLiteral. Returns rdfs:Literal if % ResourceOrLiteral is a literal and rdfs:Resource if % ResourceOrLiteral has no explicit class. class_of(literal(_), C) :- !, rdf_equal(C, rdfs:'Literal'). class_of(O, C) :- rdf_has(O, rdf:type, C), !. class_of(_, Resource) :- rdf_equal(Resource, rdfs:'Resource'). %% cluster_select(+Clusters, +Start, +End, -ReducedClusters) % % Reduced Clusers only contains the items in Cluster starting at % Start and ending at End. cluster_select([], _, _, []). cluster_select([C-Elems|T], Start, End, [C-Reduced|Rest]) :- elem_select(Elems, Start, End, Reduced), cluster_select(T, Start, End, Rest). %% elem_select(+List, +Start, +End, -SubList) % % SubList contains the elements from List starting at Start % element up to the End element. elem_select(Items, Start, End , Items) :- Start > End, !. elem_select(Items, 0, End, Selected) :- !, length(Items, Total), ( Total =< End -> Selected = Items ; length(Selected, End), append(Selected, _, Items) ). elem_select(Items, Start, End, Selected) :- length(Items, Total), length(L0, Start), append(L0, Rest, Items), ( End >= Total -> Selected = Rest ; Length is End - Start, length(Selected, Length), append(Selected, _, Rest) ). %% target(+Target, -URI, -Score). % % Split up target target(Score-URI, URI, Score) :- !. target(URI, URI, 1). %% reply_clusters(+Data, +Options) % % Write data in JSON format to output stream. reply_clusters(clusters(Clusters)) :- clusters_to_json(Clusters, JsonClusters), Json = json([clusters=JsonClusters]), reply_json(Json). %% clusters_to_json(+Clusters, -JsonResults) % % Converts clusters to prolog terms appropriate for json output. clusters_to_json([],[]). clusters_to_json([Cluster|Clusters], [JsonCluster|JsonResults]) :- cluster_info(Cluster, Path, ClusterItems), length(ClusterItems, NumberOfResults), items_to_json(ClusterItems, JsonItems), JsonCluster = json([path=Path, results=NumberOfResults, items=JsonItems]), clusters_to_json(Clusters, JsonResults). cluster_info(Path - Items, Path, Items). %% items_to_json(+Clusters, -JsonResults) % % Converts items to prolog terms appropriate for json output. items_to_json([], []). items_to_json([_Score-Uri-_List|Results], [Item|JsonItems]) :- Item = json([uri=Uri]), items_to_json(Results, JsonItems).