:- module(ptr,
	  [create_collection/0,
	   c_median/1,
	   ptr_rank/2
	  ]).

:- use_module(library(stop_words)).
:- use_module(library(real)).
:- use_module(library(tag_concept), [baseline_video/1,m_video/1,topN/4,video_mbh_terms/2]).
%:- use_module(video_vectors).

/* Probabilistic tag relevance

http://www2009.eprints.org/36/1/p351.pdf

*/

:- dynamic
	c_vector/2,
	median_cache/1.

:- set_prolog_stack(global, limit(1240000000)).

		 /*******************************
		 *   process collection		*
		 *******************************/

%%	create_collection
%
%	Create the collection on which we are going the run the PTR.

create_collection :-
	debug(ptr_collection),
	retractall(c_vector(_,_)),
	retractall(median_cache(_)),
	% collect videos and tags in the collection
	findall(V, m_video(V), Videos),
	collection_tags(Videos, Tags),
	% output some stats
	length(Videos, Video_Count),
	length(Tags, Tag_Count),
	debug(ptr_collection, '~w videos and ~w tags', [Video_Count, Tag_Count]),
	% create a vector for each video
	debug(ptr_collection, 'creating video vectors', []),
	video_vectors(Videos, Tags, VideoVectors),
	write_collection(VideoVectors).

write_collection(VideoVectors) :-
	File = 'video_vectors.pl',
	debug(ptr_collection, 'write vectors to ~w', [File]),
	open(File, write, Stream),
	(   member(Video-Vector, VideoVectors),
	    assert(c_vector(Video, Vector)),
	    portray_clause(Stream, c_vector(Video, Vector)),
	    fail
	;   true
	),
	close(Stream).

video_vectors([], _, []).
video_vectors([V|Vs], Tags, [V-Vector|Rest]) :-
	findall(Tag, matched_video_tag(V, literal(Tag)), VTags0),
	sort(VTags0, VTags1),
	remove_stop_words(VTags1, dutch, VTags),
	v_zip(Tags, VTags, Vector),
	video_vectors(Vs, Tags, Rest).

v_zip([], [], []).
v_zip([Tag|Ts], [Tag|VTs], [1|Vs]) :-
	!,
	v_zip(Ts, VTs, Vs).
v_zip([_Tag|Ts], VTs, [0|Vs]) :-
	v_zip(Ts, VTs, Vs).


median_test :-
	D <- [1,2],
	x <- median(D),
	print(x).

c_median(M) :-
	%M = 12.64911,
	median_cache(M),
	!.
c_median(M) :-
	findall(V, c_vector(_, V), Vectors),
	pair_wise_distance(Vectors, Ds),
	d <- Ds,
	<- print(d),
	M <- median(d),
	assert(median_cache(M)).

pair_wise_distance([], []).
pair_wise_distance([V|Vs], Ds) :-
	maplist(vector_distance(V), Vs, D0),
	pair_wise_distance(Vs, D1),
	append(D0, D1, Ds).

vector_distance(V1,V2,D) :-
	v1 <- V1,
	v2 <- V2,
	D <- sqrt(sum(abs(v1-v2)^2)).


		 /*******************************
		 *	       ptr		*
		 *******************************/

%%	ptr_rank(+Video, -RankedTags)
%
%	Ranked tags is a list of score-tag pairs.

ptr_rank(Video, RankedTags) :-
	video_tags(Video, Tags),
	length(Tags, TagCount),
	debug(ptr, 'video: ~w, tags: ~w', [Video,TagCount]),
	ptr_probs(Tags, Video, Probs),
	pairs_keys_values(Pairs, Probs, Tags),
	keysort(Pairs, Pairs1),
	reverse(Pairs1, RankedTags).

ptr_probs([], _, []).
ptr_probs([Tag|Tags], Video, [Prob|Probs]) :-
	ptr(Video, Tag, Prob),
	debug(ptr, '~w ~f2', [Tag,Prob]),
	ptr_probs(Tags, Video, Probs).

ptr(Video, Tag, Probability) :-
	findall(V, (tag_of_c_video(literal(Tag), V),V\==Video), Neighbour_Videos0),
	sort(Neighbour_Videos0, Neighbour_Videos),
	length(Neighbour_Videos, V_Count0),
	debug(ptr, 'videos: ~w', [V_Count0]),
	V_Count is V_Count0+1,
	c_vector(Video, V_Vector),
	maplist(c_vector, Neighbour_Videos, N_Vectors),
	kde(N_Vectors, V_Vector, V_Count, Probability).

kde(Neighbours, X, Video_Count, Prob) :-
	maplist(k(X), Neighbours, Ks),
	sumlist(Ks, K),
	Prob is (1/Video_Count) * K.

k(X, N, K) :-
	% we could declare "x" and "m" once, as in R they are global vars in R, but I do not like global vars in prolog :(
	c_median(Median),
	m <- Median,
	x <- X,
	n <- N,
	K <- exp(-(sqrt(sum((x-n)^2))^2)/m^2).


		 /*******************************
		 *  video tag utilities		*
		 *******************************/

%%	collection_tags(+Videos, -Tags)
%
%	Tags are all matching tags in a collecition of Videos.

collection_tags(Videos, Tags) :-
	findall(Tag, (member(Video,Videos),
		      matched_video_tag(Video, literal(Tag))
		     ),
		Tags0),
	sort(Tags0, Tags1),
	remove_stop_words(Tags1, dutch, Tags).

%%	video_tags(+Video, -Tags)
%
%	Tags are all matching tags in Video with stop words removed.

video_tags(Video, Tags) :-
	findall(T, matched_video_tag(Video, literal(T)), Tags0),
	sort(Tags0, Tags1),
	remove_stop_words(Tags1, dutch, Tags).

%%	matched_video_tag(?Video, ?Tag)
%
%	Tag is an annotation of Video.

matched_video_tag(Video, Tag) :-
	rdf(Video, pprime:hasAnnotation, AnnotationId),
	rdf(AnnotationId, rdf:value, Tag),
	rdf(AnnotationId, pprime:score, literal(ScoreA)),
	atom_number(ScoreA, Score),
	Score > 5.


tag_of_c_video(Tag, Video) :-
	rdf(AnnotationId, rdf:value, Tag),
	rdf(Video, pprime:hasAnnotation, AnnotationId),
	c_vector(Video, _),
	rdf(AnnotationId, pprime:score, literal(ScoreA)),
	atom_number(ScoreA, Score),
	Score > 5.


mbh_ptr_topN_eval_table :-
	(   m_video(V),
	    rdf(V, dc:id, literal(Id)),
	    video_mbh_terms(V, Terms),
	    length(Terms, TermCount),

	    (	TermCount is 0
	    ->	format('~w', [Id])
	    ;	TermCount2 is TermCount*2,
		ptr_rank(V, Tags),
		pairs_values(Tags, Tags1),
		topN(Tags1, TermCount, Terms, TopN),
		topN(Tags1, TermCount2, Terms, Top2N),

		format('~w,~2f,~2f~n', [Id, TopN, Top2N])
	    ),
	    fail
	;   true
	).