vumix/commit

fix first version of probability tag ranking

authorMichiel Hildebrand
Thu May 24 15:14:31 2012 +0200
committerMichiel Hildebrand
Thu May 24 15:14:31 2012 +0200
commitae4af67c8e2668a1f911a4ec61cd5e58f08a0881
tree42b21a3a3be11a199e05f28166f79de6277a33be
parent486a2c114fb4a6199e241a4d6445abd70b40fdb0
Diff style: patch stat
diff --git a/lib/ptr.pl b/lib/ptr.pl
index 7633f16..5e79151 100644
--- a/lib/ptr.pl
+++ b/lib/ptr.pl
@@ -5,7 +5,7 @@
 
 :- use_module(library(video_annotation)).
 :- use_module(library(stop_words)).
-:- use_module(library('R')).
+:- use_module(library(real)).
 
 /* Probabilistic tag relevance
 
@@ -13,68 +13,116 @@ http://www2009.eprints.org/36/1/p351.pdf
 
 */
 
-:- dynamic
-	v_tag/2.
-
 ptr_rank(Video, RankedTags) :-
-	findall(T, video_tag(Video, literal(T)), Tags0),
+	set_prolog_stack(global, limit(512000000)),
+	findall(T, matched_video_tag(Video, literal(T)), Tags0),
 	sort(Tags0, Tags1),
 	remove_stop_words(Tags1, dutch, Tags),
 	length(Tags, TagCount),
-	debug(ptr, 'tags: ~w', TagCount),
+	debug(ptr, 'video: ~w, tags: ~w', [Video,TagCount]),
 	ptr_probs(Tags, Video, TagCount, Probs),
-	pairs_keys_values(Probs, Tags, Pairs),
+	pairs_keys_values(Pairs, Probs, Tags),
 	keysort(Pairs, Pairs1),
 	reverse(Pairs1, RankedTags).
 
 ptr_probs([], _, _, []).
 ptr_probs([Tag|Tags], Video, N, [Prob|Probs]) :-
 	N1 is N - 1,
-	debug(ptr, '~w ~w', [N, Tag]),
+	debug(ptr_tag, '~w ~w', [N, Tag]),
 	ptr(Video, Tag, Prob),
-	debug(ptr, '~f2~n', [Prob]),
+	debug(ptr_tag, '~f2~n', [Prob]),
+	debug(ptr_result, '~w ~f2', [Tag,Prob]),
 	ptr_probs(Tags, Video, N1, Probs).
 
 ptr(Video, Tag, Probability) :-
-	findall(V, video_tag(V, literal(Tag)), Neighbour_Videos0),
+	findall(V, matched_video_tag(V, literal(Tag)), Neighbour_Videos0),
 	sort(Neighbour_Videos0, Neighbour_Videos),
 	length(Neighbour_Videos, V_Count),
-	debug(ptr, 'videos: ~w', [V_Count]),
+	debug(ptr_tag, 'videos: ~w', [V_Count]),
 	tag_vectors([Video|Neighbour_Videos], [V_Vector|N_Vectors]),
-	kde(N_Vectors, V_Vector, Probability).
+	median_distance([V_Vector|N_Vectors], Median),
+	debug(ptr_tag, 'median ~2f', [Median]),
+	kde(N_Vectors, V_Vector, V_Count, Median, Probability).
 
 
 tag_vectors(Videos, Vectors) :-
 	findall(Tag, (member(V,Videos),
-		      video_tag(V, literal(Tag)),
-		      assert(v_tag(V,Tag))
+		      matched_video_tag(V, literal(Tag))
 		     ),
 		Tags0),
 	sort(Tags0, Tags1),
 	remove_stop_words(Tags1, dutch, Tags),
-	maplist(video_vector(Tags), Videos, Vectors),
-	retractall(v_tag(_,_)).
-
-
-
-
-video_vector([], _, []).
-video_vector([Tag|Tags], Video, [N|Vs]) :-
-	(   v_tag(Video, Tag)
-	->  N = 1
-	;   N = 0
-	),
-	video_vector(Tags, Video, Vs).
-
-kde(Neighbours, X_List, Prob) :-
-	length(X_List, Length),
-	r_open,
-	X =.. [c|X_List],
-	maplist(k(X, Length), Neighbours, Probs),
-	sumlist(Probs, Prob),
-	r_close.
+	length(Tags, Tag_Count),
+	debug(ptr_tag, 'vector length ~w', [Tag_Count]),
+	video_vectors(Videos, Tags, Vectors),
+	debug(ptr_tag, 'created vectors', []).
+
+video_vectors([], _, []).
+video_vectors([V|Vs], Tags, [Vector|Rest]) :-
+	findall(Tag, matched_video_tag(V, literal(Tag)), VTags0),
+	sort(VTags0, VTags1),
+	remove_stop_words(VTags1, dutch, VTags),
+	v_zip(Tags, VTags, Vector),
+	video_vectors(Vs, Tags, Rest).
+
+v_zip([], [], []).
+v_zip([Tag|Ts], [Tag|VTs], [1|Vs]) :-
+	!,
+	v_zip(Ts, VTs, Vs).
+v_zip([_Tag|Ts], VTs, [0|Vs]) :-
+	v_zip(Ts, VTs, Vs).
+
+
+kde(Neighbours, X, Video_Count, Median, Prob) :-
+	x <- X,
+	maplist(k(Median), Neighbours, Ks),
+	sumlist(Ks, K),
+	Prob is (1/Video_Count) * K.
+
+k(Median, N, K) :-
+	n <- N,
+	% there is a bug in real R that makes (x-n)^2 into x-(n^2). I use abs() to explicitly force parenthesis
+	K <- exp(-(sqrt(sum(abs((x-n))^2))^2)/Median^2).
+
+%	r_in( Prob <- exp(-(sum(sqrt(abs((X - N))^2))^2)/ (Length/4)^2)
+%	).
+
+
+median_distance(Vs,Median) :-
+	perm_d(Vs, Ds),
+	length(Ds, Length),
+	sum_list(Ds, Total),
+	Median is (Total/Length)+1.
+
+perm_d([], []).
+perm_d([V|Vs], Ds) :-
+	maplist(v_distance(V), Vs, D0),
+	perm_d(Vs, D1),
+	append(D0, D1, Ds).
+
+
+v_distance(V1,V2,D) :-
+	v1 <- V1,
+	v2 <- V2,
+	D <- sqrt(sum(abs(v1-v2)^2)).
+
+%%	matched_video_tag(?Video, ?Tag)
+%
+%	Tag is an annotation of Video.
+
+matched_video_tag(Video, Tag) :-
+	ground(Tag),
+	!,
+	rdf(AnnotationId, rdf:value, Tag),
+	rdf(Video, pprime:hasAnnotation, AnnotationId),
+	rdf(AnnotationId, pprime:score, literal(ScoreA)),
+	atom_number(ScoreA, Score),
+	Score > 5.
+matched_video_tag(Video, Tag) :-
+	rdf(Video, pprime:hasAnnotation, AnnotationId),
+	rdf(AnnotationId, rdf:value, Tag),
+	rdf(AnnotationId, pprime:score, literal(ScoreA)),
+	atom_number(ScoreA, Score),
+	Score > 5.
 
-k(X, Length, N_List, Prob) :-
-	N =.. [c|N_List],
-	r_in( Prob <- exp(-(sum(sqrt(abs((X - N))^2))^2)/ (Length/4)^2) ).
 
diff --git a/lib/r_exp.pl b/lib/r_exp.pl
index aa0fbaa..b47f844 100644
--- a/lib/r_exp.pl
+++ b/lib/r_exp.pl
@@ -1,7 +1,7 @@
 :- module(r_exp,
 	  []).
 
-:- use_module(library('R')).
+:- use_module(library(real)).
 :- use_module(library(tag_concept)).
 :- use_module(library(video_annotation)).
 :- use_module(library(stop_words)).
diff --git a/lib/tag_concept.pl b/lib/tag_concept.pl
index fc7bcc4..18ec6b8 100644
--- a/lib/tag_concept.pl
+++ b/lib/tag_concept.pl
@@ -336,7 +336,7 @@ list_concepts(Id, Type, Goal) :-
 		 *               C		*
 		 *******************************/
 
-:- use_module(library('R')).
+:- use_module(library(real)).
 
 
 concept_rank(Video, Ranked) :-
@@ -416,36 +416,36 @@ interpretation_graph(Tags, Goal, Scheme, IGraph, Weights) :-
 	pairs_keys_values(Pairs, IGraph, Weights).
 
 graph_cluster(Vector, Clusters) :-
-	r_open,
-	r_lib(igraph),
+	%r_open,
+	<- library(igraph),
 	v <- Vector,
 	g <- graph(v),
-	Clusters <- 'clusters(g)$membership',
-	r_close.
+	Clusters <- 'clusters(g)$membership'.
+	%r_close.
 
 page_rank(Vector, Weights, Rank) :-
-	r_open,
-	r_lib(igraph),
+	%r_open,
+	<- library(igraph),
 	v <- Vector,
 	w <- Weights,
 	g <- graph(v),
 	%r_print( g ),
-	Rank <- 'page.rank(g, weights = w)$vector',
+	Rank <- 'page.rank(g, weights = w)$vector'.
 	%r_in( 'page.rank'(g, vids = 'V'(g), directed = 'TRUE', damping = 0.2,
 		%	  weights = 'NULL', options = 'igraph.arpack.default') ),
-	r_close.
+	%r_close.
 
 show_graph(Vector) :-
-	r_open,
-	r_lib(igraph),
+	%r_open,
+	<- library(igraph),
 	v <- Vector,
 	g <- graph(v),
-	r_print( g ),
-	r_in( tkplot( g ) ),
+	<- print(g),
+	<- tkplot( g ),
 	write( 'Press Return to continue...' ), nl,
 	read_line_to_codes( user_input, _ ),
-	r_print( 'dev.off()' ),
-	r_close.
+	<- print( 'dev.off()' ).
+	%r_close.
 
 
 expand_graph(C, C1, 0.1) :-