cluster_search/commit

Added debugging messages and new analytics code.

authorChris Dijkshoorn
Tue Jan 21 15:19:21 2014 +0100
committerChris Dijkshoorn
Tue Jan 21 15:19:21 2014 +0100
commit7fcad6a0a373c89f649e3f3ddbd700375da1b7fe
tree0e593775ea1d5009786f1f3ca73099528fa152a5
parente3e5c82aa930fefc49a1f50b577c1aa2ca3f3849
Diff style: patch stat
diff --git a/api/cluster_search.pl b/api/cluster_search.pl
index 453cd19..01ea3b9 100644
--- a/api/cluster_search.pl
+++ b/api/cluster_search.pl
@@ -77,6 +77,7 @@ get_parameters(Request, Options) :-
 %	including the search graph.
 cluster_search(Clusters, Options) :-
     option(query(Query), Options),
+    debug(query, 'cluster_search Query: ~p', [Query]),
     SearchOptions = [graphOutput(spo)],
     graph_search(Query, State, SearchOptions),
     rdf_search_property(State, targets(SearchResults)),
diff --git a/lib/cluster_search/graph_search.pl b/lib/cluster_search/graph_search.pl
index c4e66e6..07e5386 100644
--- a/lib/cluster_search/graph_search.pl
+++ b/lib/cluster_search/graph_search.pl
@@ -42,11 +42,9 @@ graph_search(Keyword, State, Options) :-
 graph_search(Keyword, State, Options) :-
     check_cache_validity,
     term_hash(Keyword+Options, Key),
-    (   graph_search_cache(Key, Keyword, Options, State),
-	debug(graph_search, 'graph_search Keyword: ~p, in cache', [Keyword])
+    (   graph_search_cache(Key, Keyword, Options, State)
     ->  true
-    ;   debug(graph_search, 'graph_search Keyword: ~p, caching', [Keyword]),
-	do_graph_search(Keyword, State, Options),
+    ;   do_graph_search(Keyword, State, Options),
 	push_graph_search_cache(Key, Keyword, Options, State)
     ).
 
diff --git a/lib/cluster_search/kwd_search.pl b/lib/cluster_search/kwd_search.pl
index 54df40d..767bacd 100644
--- a/lib/cluster_search/kwd_search.pl
+++ b/lib/cluster_search/kwd_search.pl
@@ -26,7 +26,6 @@
 	    match_string/7,	% +Atom, +Range, +Threshold, +Map, -URI, -Score, -Path
 	    search_string/6,	% +Atom, +Range, +Threshold, +Map, -Score, -Path
 	    search_graph/7,	% +From, +Range, +Score0, +Threshold, +Map, -Score, -Path
-
 	    find_literals/3	% +Search, +Threshold, -Literals
 	  ]).
 :- use_module(library(lists)).
@@ -219,14 +218,14 @@ op_pairs([P,O|T0], [O-P|T]) :-
 
 find_literals(Search, Literals, Options) :-
 	option(threshold(Threshold), Options, 0.0),
-	(   rdf_tokenize_literal(Search, Tokens)
+	(      rdf_tokenize_literal(Search, Tokens)
 	->  true
 	;   Tokens = [Search]		% HACK
 	),
 	all_literals(Tokens, Literals0),
 	sort(Literals0, Literals1),
 	length(Literals1, NL1),
-	debug(search, '~D matches', [NL1]),
+	debug(find_literals, '~D matches', [NL1]),
 	sort_matches(Literals1, Tokens, Threshold, Literals).
 
 all_literals(Tokens, Literals) :-
@@ -238,7 +237,10 @@ all_literals(Tokens, Literals) :-
 	;   tokenize_hits(Ls0, Tokenized),
 	    filter_compounds(Compounds, Tokenized, KeyedLiterals),
 	    pairs_values(KeyedLiterals, Literals)
-	).
+	),
+	debug(find_literals, 'Step1A: Porter stem Query.', []),
+	debug(find_literals, 'Step1B: Remove stop words.', []),
+	debug(find_literals, 'Step1C: Retrieve list of literals containing result 1B', []).
 
 %%	compound_search_tokens(+Tokens, -Compounds:list(list), -AllTokens)
 %
@@ -311,9 +313,11 @@ same_tokens([H0|T0], [H1|T]) :-
 %		the same as Matches.  Should be cleaned.
 
 sort_matches(Set0, Search, Threshold, Set) :-
-	tag_match_score(Set0, Search, Threshold, Tagged),
-	keysort(Tagged, Set1),
-	reverse(Set1, Set).
+    debug(find_literals, 'Step1D: Score literals',[]),
+    tag_match_score(Set0, Search, Threshold, Tagged),
+    debug(find_literals, 'Step1E: Sort list of literals by score',[]),
+    keysort(Tagged, Set1),
+    reverse(Set1, Set).
 
 
 %%	tag_match_score(+Set:list(atom), +Search, +Threshold, -Result:list(Score-Atom))
@@ -350,7 +354,8 @@ match_score(Search, Literal, Score) :-
 	tokens(Search, SearchTokens),
 	tokens(Literal, LiteralTokens),
 	(   SearchTokens == LiteralTokens
-	->  Score = 1
+	->  Score = 1,
+	    debug(find_literals, 'Step1D1 Rate: SearchTokens ~p, LiteralTokens ~p, perfect match Score 1.', [SearchTokens, LiteralTokens])
 	;   literal_distance(SearchTokens, LiteralTokens, LD)
 	->  Score is 3/(3+LD)
 	;   add_stems(SearchTokens, SearchStems),
diff --git a/lib/cluster_search/rdf_backward_search.pl b/lib/cluster_search/rdf_backward_search.pl
index 3437213..c3e747d 100644
--- a/lib/cluster_search/rdf_backward_search.pl
+++ b/lib/cluster_search/rdf_backward_search.pl
@@ -37,6 +37,7 @@ rdf_backward_search(Keyword, TargetCond, State, Options) :-
 	->  Steps = -1
 	;   Steps = Steps0
 	),
+	debug(query, 'rdf_backward_search Query: ~p', [Keyword]),
 	rdf_keyword_search(Keyword, TargetCond, State,
 			   [expand_node(Expand)|Options]),
 	steps(0, Steps, State).
diff --git a/lib/cluster_search/rdf_search.pl b/lib/cluster_search/rdf_search.pl
index 47efd26..3ae661c 100644
--- a/lib/cluster_search/rdf_search.pl
+++ b/lib/cluster_search/rdf_search.pl
@@ -99,7 +99,6 @@ rdf_keyword_search(Keyword, TargetCond, State, Options) :-
 %
 %	Initiate a search-graph state.
 %	Options see rdf_keyword_search/4
-
 rdf_init_state(TargetCond, State, Options) :-
 	new_search_graph(Graph),
 	empty_assoc(Start),
@@ -113,21 +112,31 @@ rdf_init_state(TargetCond, State, Options) :-
 	meta_options(rdf_search:is_meta, Options, MetaOptions),
 	set_state_fields(MetaOptions, State0, State, _RestOptions).
 
-
-rdf_start_search(R, State) :-
-	(   is_list(R)
-	->  Rs = R
-	;   is_resource(R)
-	->  Rs = [R]
+%%	rdf_start_search(+Query, -State)
+%
+%	Starts the search based on a query consisting of a:
+%
+%	* List of Resources
+%	* Resource
+%	* String
+rdf_start_search(Query, State) :-
+	(   is_list(Query)
+	->  Resources = Query
+	;   is_resource(Query)
+	->  Resources = [Query]
 	),  !,
+	debug(query, 'rdf_start_search Resource to query for ~p', [Resources]),
 	state_start(State, Start0),
 	state_graph(State, Graph),
-	add_resources(Rs, Start0, Graph, Start, Links),
+	add_resources(Resources, Start0, Graph, Start, Links),
 	set_start_of_state(Start, State),
 	add_hits(Links, State).
-rdf_start_search(Search, State) :-
+rdf_start_search(Query, State) :-
 	state_literal_threshold(State, Threshold),
-	find_literals(Search, Literals, [threshold(Threshold)]),
+	debug(graph_search_algorithm, 'Step 1: Find Literals based on query: ~p with treshold: ~p', [Query, threshold(Threshold)]),
+	find_literals(Query, Literals, [threshold(Threshold)]),
+	length(Literals, Length),
+	debug(graph_search_algorithm, 'Result Step 1: ~p resulting literals with scores: ~p', [Length, Literals]),
 	state_graph(State, Graph),
 	state_start(State, Start0),
 	(   state_literal_score(State, false)
diff --git a/lib/cluster_search/search_statistics.pl b/lib/cluster_search/search_statistics.pl
index f001195..95dcf98 100644
--- a/lib/cluster_search/search_statistics.pl
+++ b/lib/cluster_search/search_statistics.pl
@@ -132,15 +132,15 @@ get_statistics_ulan(dataset_statistics(ulan, 'ULAN', UlanStatistics)) :-
 %
 %	Generates a list of statics regarding the AAT
 get_statistics_aat(dataset_statistics(aat, 'AAT', AATStatistics)) :-
-    findall(Person, rdf(Person, rdf:type, rma:'Person'), PersonList),
-    length(PersonList, NumberOfPersons),
+    findall(Concept, rdf(Concept, rdf:type, rma:'Person'), ConceptList),
+    length(ConceptList, NumberOfConcepts),
     findall(Maker, rdf(_Work, rma:maker, Maker), MakerList),
     length(MakerList, NumberOfMakers),
     list_to_set(MakerList, DistinctMakers),
     length(DistinctMakers, NumberOfDistinctMakers),
     debug(statistics, 'Retrieved statistics aat', []),
     AATStatistics =
-    [['total number of people in thesaurus', NumberOfPersons],
+    [['total number of concepts in thesaurus', NumberOfConcepts],
      ['total number of maker annotations added to artworks', NumberOfMakers],
      ['number of distinct makers used in Rijksmuseum collection', NumberOfDistinctMakers]].
 
diff --git a/web/js/analytics.js b/web/js/analytics.js
index d166ff5..ee3da6e 100644
--- a/web/js/analytics.js
+++ b/web/js/analytics.js
@@ -1,7 +1,7 @@
 (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+	(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+	m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
 
-ga('create', 'UA-42124949-1', 'vu.nl');
+ga('create', 'UA-47334744-1', 'vu.nl');
 ga('send', 'pageview');
\ No newline at end of file