:- module(ptr, [create_collection/0, c_median/1, ptr_rank/2 ]). :- use_module(library(stop_words)). :- use_module(library(real)). :- use_module(library(tag_concept), [baseline_video/1,m_video/1,topN/4,video_mbh_terms/2]). %:- use_module(video_vectors). /* Probabilistic tag relevance http://www2009.eprints.org/36/1/p351.pdf */ :- dynamic c_vector/2, median_cache/1. :- set_prolog_stack(global, limit(1240000000)). /******************************* * process collection * *******************************/ %% create_collection % % Create the collection on which we are going the run the PTR. create_collection :- debug(ptr_collection), retractall(c_vector(_,_)), retractall(median_cache(_)), % collect videos and tags in the collection findall(V, m_video(V), Videos), collection_tags(Videos, Tags), % output some stats length(Videos, Video_Count), length(Tags, Tag_Count), debug(ptr_collection, '~w videos and ~w tags', [Video_Count, Tag_Count]), % create a vector for each video debug(ptr_collection, 'creating video vectors', []), video_vectors(Videos, Tags, VideoVectors), write_collection(VideoVectors). write_collection(VideoVectors) :- File = 'video_vectors.pl', debug(ptr_collection, 'write vectors to ~w', [File]), open(File, write, Stream), ( member(Video-Vector, VideoVectors), assert(c_vector(Video, Vector)), portray_clause(Stream, c_vector(Video, Vector)), fail ; true ), close(Stream). video_vectors([], _, []). video_vectors([V|Vs], Tags, [V-Vector|Rest]) :- findall(Tag, matched_video_tag(V, literal(Tag)), VTags0), sort(VTags0, VTags1), remove_stop_words(VTags1, dutch, VTags), v_zip(Tags, VTags, Vector), video_vectors(Vs, Tags, Rest). v_zip([], [], []). v_zip([Tag|Ts], [Tag|VTs], [1|Vs]) :- !, v_zip(Ts, VTs, Vs). v_zip([_Tag|Ts], VTs, [0|Vs]) :- v_zip(Ts, VTs, Vs). median_test :- D <- [1,2], x <- median(D), print(x). c_median(M) :- %M = 12.64911, median_cache(M), !. c_median(M) :- findall(V, c_vector(_, V), Vectors), pair_wise_distance(Vectors, Ds), d <- Ds, <- print(d), M <- median(d), assert(median_cache(M)). pair_wise_distance([], []). pair_wise_distance([V|Vs], Ds) :- maplist(vector_distance(V), Vs, D0), pair_wise_distance(Vs, D1), append(D0, D1, Ds). vector_distance(V1,V2,D) :- v1 <- V1, v2 <- V2, D <- sqrt(sum(abs(v1-v2)^2)). /******************************* * ptr * *******************************/ %% ptr_rank(+Video, -RankedTags) % % Ranked tags is a list of score-tag pairs. ptr_rank(Video, RankedTags) :- video_tags(Video, Tags), length(Tags, TagCount), debug(ptr, 'video: ~w, tags: ~w', [Video,TagCount]), ptr_probs(Tags, Video, Probs), pairs_keys_values(Pairs, Probs, Tags), keysort(Pairs, Pairs1), reverse(Pairs1, RankedTags). ptr_probs([], _, []). ptr_probs([Tag|Tags], Video, [Prob|Probs]) :- ptr(Video, Tag, Prob), debug(ptr, '~w ~f2', [Tag,Prob]), ptr_probs(Tags, Video, Probs). ptr(Video, Tag, Probability) :- findall(V, (tag_of_c_video(literal(Tag), V),V\==Video), Neighbour_Videos0), sort(Neighbour_Videos0, Neighbour_Videos), length(Neighbour_Videos, V_Count0), debug(ptr, 'videos: ~w', [V_Count0]), V_Count is V_Count0+1, c_vector(Video, V_Vector), maplist(c_vector, Neighbour_Videos, N_Vectors), kde(N_Vectors, V_Vector, V_Count, Probability). kde(Neighbours, X, Video_Count, Prob) :- maplist(k(X), Neighbours, Ks), sumlist(Ks, K), Prob is (1/Video_Count) * K. k(X, N, K) :- % we could declare "x" and "m" once, as in R they are global vars in R, but I do not like global vars in prolog :( c_median(Median), m <- Median, x <- X, n <- N, K <- exp(-(sqrt(sum((x-n)^2))^2)/m^2). /******************************* * video tag utilities * *******************************/ %% collection_tags(+Videos, -Tags) % % Tags are all matching tags in a collecition of Videos. collection_tags(Videos, Tags) :- findall(Tag, (member(Video,Videos), matched_video_tag(Video, literal(Tag)) ), Tags0), sort(Tags0, Tags1), remove_stop_words(Tags1, dutch, Tags). %% video_tags(+Video, -Tags) % % Tags are all matching tags in Video with stop words removed. video_tags(Video, Tags) :- findall(T, matched_video_tag(Video, literal(T)), Tags0), sort(Tags0, Tags1), remove_stop_words(Tags1, dutch, Tags). %% matched_video_tag(?Video, ?Tag) % % Tag is an annotation of Video. matched_video_tag(Video, Tag) :- rdf(Video, pprime:hasAnnotation, AnnotationId), rdf(AnnotationId, rdf:value, Tag), rdf(AnnotationId, pprime:score, literal(ScoreA)), atom_number(ScoreA, Score), Score > 5. tag_of_c_video(Tag, Video) :- rdf(AnnotationId, rdf:value, Tag), rdf(Video, pprime:hasAnnotation, AnnotationId), c_vector(Video, _), rdf(AnnotationId, pprime:score, literal(ScoreA)), atom_number(ScoreA, Score), Score > 5. mbh_ptr_topN_eval_table :- ( m_video(V), rdf(V, dc:id, literal(Id)), video_mbh_terms(V, Terms), length(Terms, TermCount), ( TermCount is 0 -> format('~w', [Id]) ; TermCount2 is TermCount*2, ptr_rank(V, Tags), pairs_values(Tags, Tags1), topN(Tags1, TermCount, Terms, TopN), topN(Tags1, TermCount2, Terms, Top2N), format('~w,~2f,~2f~n', [Id, TopN, Top2N]) ), fail ; true ).