:- module(tagrank, [sem_tag_rank/2 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(real)). :- use_module(library(tfidf)). :- use_module(api(reconcile)). sem_tag_rank(Video, Ranked) :- tag_rank(Video, RankedTags), length(RankedTags, Tag_Count), debug(semrank, 'tags: ~w', [Tag_Count]), tag_interpretations(RankedTags, Interpretations), pairs_values(RankedTags, Tags), list_to_assoc(Interpretations, Interpretations_Assoc), tag_distance_graph(Tags, Interpretations_Assoc, Graph), length(Graph, Edge_Count), debug(semrank, 'distance graph size: ~w', [Edge_Count]), weighted_graph_rename(Graph, Mapping, Vector, Weights), length(Vector, V_Count), debug(semrank, 'vector: ~w', [V_Count]), page_rank(Vector, Weights, Scores0), Scores0 = [_EigenVector|Scores], assoc_to_list(Mapping, Pairs0), transpose_pairs(Pairs0, Pairs1), keysort(Pairs1, Pairs), pairs_values(Pairs, Resources), pairs_keys_values(Ranked0, Scores, Resources), keysort(Ranked0, Ranked1), reverse(Ranked1, Ranked2), merge_scores(RankedTags, Ranked2, Ranked). merge_scores(R1, R2, R) :- transpose_pairs(R1,R1_T), transpose_pairs(R2,R2_T), merge_tag_scores(R1_T,R2_T, Merged0), transpose_pairs(Merged0, Merged), keysort(Merged, Merged1), reverse(Merged1, R). merge_tag_scores([], T, T). merge_tag_scores([R-S1|T1], [R-S2|T2], [R-S|T]) :- !, S is S1+S2, merge_tag_scores(T1, T2, T). merge_tag_scores([R1-S1|T1], T2, [R1-S1|T]) :- merge_tag_scores(T1, T2, T). tag_interpretations([], []). %tag_interpretations([W-_Tag|Tags], Pairs) :- % W < 0.005, % !, % tag_interpretations(Tags, Pairs). tag_interpretations([_W-Tag|Tags], [Tag-Concepts|Pairs]) :- rdf_equal(skos:'Concept', Type), findall(C, (reconcile(Tag, 10, Type, [], Hits), member(hit(D,C,_,_), Hits), D < 6 ), Concepts), tag_interpretations(Tags, Pairs). page_rank(Vector, Weights, Rank) :- <- library(igraph), v <- Vector, w <- Weights, g <- graph(v), Rank <- 'page.rank(g, directed=TRUE, weights = w)$vector'. tag_distance_graph(Tags, Interpretations, Graph) :- cartesian(Tags, Tags, Cartesian), pair_distances(Cartesian, Interpretations, Graph). pair_distances([], _, []). pair_distances([[A,A]|T], I, Graph) :- !, pair_distances(T, I, Graph). pair_distances([[A,B]|T], I, [Rel|Graph]) :- Rel = i(A,B,Distance), findall(D, ( get_assoc(A, I, A_Concepts), get_assoc(B, I, B_Concepts), member(A_C, A_Concepts), member(B_C, B_Concepts), semantic_distance(A_C,B_C,D) ), Ds0), sort(Ds0, Ds), reverse(Ds, [Distance|_]), !, pair_distances(T, I, Graph). pair_distances([_|T], I, Graph) :- pair_distances(T, I, Graph). semantic_distance(A, B, D) :- rdf_reachable(B, skos:broader, A, 5, N), !, D is 0.5^N. semantic_distance(A, B, D) :- rdf_reachable(A, skos:broader, B, 2, N), !, D is 0.01^N. semantic_distance(A, B, D) :- rdf_reachable(A, skos:broader, C, 2, N1), A \== C, rdf_reachable(B, skos:broader, C, 2, N2), B \== C, !, N is max(N1, N2), D is 0.01^N. semantic_distance(A, B, D) :- rdf_reachable(A, skos:related, B, 2, N), !, D is 0.01^N. semantic_distance(A, B, D) :- rdf(A, P, B), \+ rdfs_subproperty_of(P, skos:broader), \+ rdfs_subproperty_of(P, skos:narrower), \+ rdfs_subproperty_of(P, skos:related), !, D is 0.01. /* semantic_distance(A, B, D) :- rdf(A, _, C), ( B == C -> D = 0.5 ; rdf_reachable(B, skos:broader, C, 3, _) -> D = 0.25 ; rdf_reachable(C, skos:broader, B, 3, _) -> D = 0.5 ). */ %% semantic_distance(+L1, +L2, -Cartesian) % % Cartesian is the cartesian product of L1 and L2. cartesian([], _L, []). cartesian([A|N], L, M) :- pair(A,L,M1), cartesian(N, L, M2), append(M1, M2, M). pair(_A, [], []) :- !. pair(A, [B|L], [[A,B]|N] ) :- pair(A, L, N). %% weighted_graph_rename(+Graph, -Assoc, -NewGraph, -Weights) % % Rename nodes so that they start counted by 1. weighted_graph_rename(Graph, Assoc, NewGraph, Weights) :- empty_assoc(Assoc0), rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc, Weights). rename_vertices([], Assoc, _, [], Assoc, []). rename_vertices([i(C1,C2,W)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc, [W|Weights]) :- rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), rename_vertices(T, Assoc2, N2, Rest, Assoc, Weights). rename_vertex(C, Assoc, N, Assoc, N, New) :- get_assoc(C, Assoc, New), !. rename_vertex(C, Assoc, N, Assoc1, N1, N) :- N1 is N+1, put_assoc(C, Assoc, N, Assoc1). mbh_semrank_eval_table(V) :- ( m_video(V), rdf(V, dc:id, literal(Id)), video_mbh_terms(V, Terms), sem_tag_rank(V, Tags), length(Terms, TermCount), length(Tags, TagCount), ( TermCount is 0 -> format('~w', [Id]) ; TermCount2 is TermCount*2, pairs_values(Tags, Tags1), topN(Tags1, TermCount, Terms, TopN), topN(Tags1, TermCount2, Terms, Top2N), topN(Tags1, TagCount, Terms, All), format('~w,\t~w,\t~w,\t~2f,\t~2f,\t~2f~n', [Id, TagCount, TermCount, TopN, Top2N, All]) ), fail ; true ). topN(Tags, N, Terms, TopN) :- length(Terms, Term_Count), length(Tags, Tag_Count), ( Tag_Count > N -> length(Top_N_Tags, N), append(Top_N_Tags, _, Tags) ; Top_N_Tags = Tags ), maplist(downcase_atom, Top_N_Tags, Top_N_Tags1), sort(Top_N_Tags1, Top_N_Tags2), ord_intersect(Top_N_Tags2, Terms, Intersect), length(Intersect, Intersect_Count), TopN is Intersect_Count/Term_Count.