cluster_search/commit
Added debugging messages and new analytics code.
author | Chris Dijkshoorn |
---|---|
Tue Jan 21 15:19:21 2014 +0100 | |
committer | Chris Dijkshoorn |
Tue Jan 21 15:19:21 2014 +0100 | |
commit | 7fcad6a0a373c89f649e3f3ddbd700375da1b7fe |
tree | 0e593775ea1d5009786f1f3ca73099528fa152a5 |
parent | e3e5c82aa930fefc49a1f50b577c1aa2ca3f3849 |
Diff style: patch stat
diff --git a/api/cluster_search.pl b/api/cluster_search.pl index 453cd19..01ea3b9 100644 --- a/api/cluster_search.pl +++ b/api/cluster_search.pl @@ -77,6 +77,7 @@ get_parameters(Request, Options) :- % including the search graph. cluster_search(Clusters, Options) :- option(query(Query), Options), + debug(query, 'cluster_search Query: ~p', [Query]), SearchOptions = [graphOutput(spo)], graph_search(Query, State, SearchOptions), rdf_search_property(State, targets(SearchResults)), diff --git a/lib/cluster_search/graph_search.pl b/lib/cluster_search/graph_search.pl index c4e66e6..07e5386 100644 --- a/lib/cluster_search/graph_search.pl +++ b/lib/cluster_search/graph_search.pl @@ -42,11 +42,9 @@ graph_search(Keyword, State, Options) :- graph_search(Keyword, State, Options) :- check_cache_validity, term_hash(Keyword+Options, Key), - ( graph_search_cache(Key, Keyword, Options, State), - debug(graph_search, 'graph_search Keyword: ~p, in cache', [Keyword]) + ( graph_search_cache(Key, Keyword, Options, State) -> true - ; debug(graph_search, 'graph_search Keyword: ~p, caching', [Keyword]), - do_graph_search(Keyword, State, Options), + ; do_graph_search(Keyword, State, Options), push_graph_search_cache(Key, Keyword, Options, State) ). diff --git a/lib/cluster_search/kwd_search.pl b/lib/cluster_search/kwd_search.pl index 54df40d..767bacd 100644 --- a/lib/cluster_search/kwd_search.pl +++ b/lib/cluster_search/kwd_search.pl @@ -26,7 +26,6 @@ match_string/7, % +Atom, +Range, +Threshold, +Map, -URI, -Score, -Path search_string/6, % +Atom, +Range, +Threshold, +Map, -Score, -Path search_graph/7, % +From, +Range, +Score0, +Threshold, +Map, -Score, -Path - find_literals/3 % +Search, +Threshold, -Literals ]). :- use_module(library(lists)). @@ -219,14 +218,14 @@ op_pairs([P,O|T0], [O-P|T]) :- find_literals(Search, Literals, Options) :- option(threshold(Threshold), Options, 0.0), - ( rdf_tokenize_literal(Search, Tokens) + ( rdf_tokenize_literal(Search, Tokens) -> true ; Tokens = [Search] % HACK ), all_literals(Tokens, Literals0), sort(Literals0, Literals1), length(Literals1, NL1), - debug(search, '~D matches', [NL1]), + debug(find_literals, '~D matches', [NL1]), sort_matches(Literals1, Tokens, Threshold, Literals). all_literals(Tokens, Literals) :- @@ -238,7 +237,10 @@ all_literals(Tokens, Literals) :- ; tokenize_hits(Ls0, Tokenized), filter_compounds(Compounds, Tokenized, KeyedLiterals), pairs_values(KeyedLiterals, Literals) - ). + ), + debug(find_literals, 'Step1A: Porter stem Query.', []), + debug(find_literals, 'Step1B: Remove stop words.', []), + debug(find_literals, 'Step1C: Retrieve list of literals containing result 1B', []). %% compound_search_tokens(+Tokens, -Compounds:list(list), -AllTokens) % @@ -311,9 +313,11 @@ same_tokens([H0|T0], [H1|T]) :- % the same as Matches. Should be cleaned. sort_matches(Set0, Search, Threshold, Set) :- - tag_match_score(Set0, Search, Threshold, Tagged), - keysort(Tagged, Set1), - reverse(Set1, Set). + debug(find_literals, 'Step1D: Score literals',[]), + tag_match_score(Set0, Search, Threshold, Tagged), + debug(find_literals, 'Step1E: Sort list of literals by score',[]), + keysort(Tagged, Set1), + reverse(Set1, Set). %% tag_match_score(+Set:list(atom), +Search, +Threshold, -Result:list(Score-Atom)) @@ -350,7 +354,8 @@ match_score(Search, Literal, Score) :- tokens(Search, SearchTokens), tokens(Literal, LiteralTokens), ( SearchTokens == LiteralTokens - -> Score = 1 + -> Score = 1, + debug(find_literals, 'Step1D1 Rate: SearchTokens ~p, LiteralTokens ~p, perfect match Score 1.', [SearchTokens, LiteralTokens]) ; literal_distance(SearchTokens, LiteralTokens, LD) -> Score is 3/(3+LD) ; add_stems(SearchTokens, SearchStems), diff --git a/lib/cluster_search/rdf_backward_search.pl b/lib/cluster_search/rdf_backward_search.pl index 3437213..c3e747d 100644 --- a/lib/cluster_search/rdf_backward_search.pl +++ b/lib/cluster_search/rdf_backward_search.pl @@ -37,6 +37,7 @@ rdf_backward_search(Keyword, TargetCond, State, Options) :- -> Steps = -1 ; Steps = Steps0 ), + debug(query, 'rdf_backward_search Query: ~p', [Keyword]), rdf_keyword_search(Keyword, TargetCond, State, [expand_node(Expand)|Options]), steps(0, Steps, State). diff --git a/lib/cluster_search/rdf_search.pl b/lib/cluster_search/rdf_search.pl index 47efd26..3ae661c 100644 --- a/lib/cluster_search/rdf_search.pl +++ b/lib/cluster_search/rdf_search.pl @@ -99,7 +99,6 @@ rdf_keyword_search(Keyword, TargetCond, State, Options) :- % % Initiate a search-graph state. % Options see rdf_keyword_search/4 - rdf_init_state(TargetCond, State, Options) :- new_search_graph(Graph), empty_assoc(Start), @@ -113,21 +112,31 @@ rdf_init_state(TargetCond, State, Options) :- meta_options(rdf_search:is_meta, Options, MetaOptions), set_state_fields(MetaOptions, State0, State, _RestOptions). - -rdf_start_search(R, State) :- - ( is_list(R) - -> Rs = R - ; is_resource(R) - -> Rs = [R] +%% rdf_start_search(+Query, -State) +% +% Starts the search based on a query consisting of a: +% +% * List of Resources +% * Resource +% * String +rdf_start_search(Query, State) :- + ( is_list(Query) + -> Resources = Query + ; is_resource(Query) + -> Resources = [Query] ), !, + debug(query, 'rdf_start_search Resource to query for ~p', [Resources]), state_start(State, Start0), state_graph(State, Graph), - add_resources(Rs, Start0, Graph, Start, Links), + add_resources(Resources, Start0, Graph, Start, Links), set_start_of_state(Start, State), add_hits(Links, State). -rdf_start_search(Search, State) :- +rdf_start_search(Query, State) :- state_literal_threshold(State, Threshold), - find_literals(Search, Literals, [threshold(Threshold)]), + debug(graph_search_algorithm, 'Step 1: Find Literals based on query: ~p with treshold: ~p', [Query, threshold(Threshold)]), + find_literals(Query, Literals, [threshold(Threshold)]), + length(Literals, Length), + debug(graph_search_algorithm, 'Result Step 1: ~p resulting literals with scores: ~p', [Length, Literals]), state_graph(State, Graph), state_start(State, Start0), ( state_literal_score(State, false) diff --git a/lib/cluster_search/search_statistics.pl b/lib/cluster_search/search_statistics.pl index f001195..95dcf98 100644 --- a/lib/cluster_search/search_statistics.pl +++ b/lib/cluster_search/search_statistics.pl @@ -132,15 +132,15 @@ get_statistics_ulan(dataset_statistics(ulan, 'ULAN', UlanStatistics)) :- % % Generates a list of statics regarding the AAT get_statistics_aat(dataset_statistics(aat, 'AAT', AATStatistics)) :- - findall(Person, rdf(Person, rdf:type, rma:'Person'), PersonList), - length(PersonList, NumberOfPersons), + findall(Concept, rdf(Concept, rdf:type, rma:'Person'), ConceptList), + length(ConceptList, NumberOfConcepts), findall(Maker, rdf(_Work, rma:maker, Maker), MakerList), length(MakerList, NumberOfMakers), list_to_set(MakerList, DistinctMakers), length(DistinctMakers, NumberOfDistinctMakers), debug(statistics, 'Retrieved statistics aat', []), AATStatistics = - [['total number of people in thesaurus', NumberOfPersons], + [['total number of concepts in thesaurus', NumberOfConcepts], ['total number of maker annotations added to artworks', NumberOfMakers], ['number of distinct makers used in Rijksmuseum collection', NumberOfDistinctMakers]]. diff --git a/web/js/analytics.js b/web/js/analytics.js index d166ff5..ee3da6e 100644 --- a/web/js/analytics.js +++ b/web/js/analytics.js @@ -1,7 +1,7 @@ (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ -(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), -m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); -ga('create', 'UA-42124949-1', 'vu.nl'); +ga('create', 'UA-47334744-1', 'vu.nl'); ga('send', 'pageview'); \ No newline at end of file