:- module(semrank, [r_semantic_distance_rank/2, c_semantic_distance_rank/2, semantic_distance_rank/2 ]). :- use_module(library(semweb/rdf_db)). :- use_module(library(semweb/rdfs)). :- use_module(library(real)). :- use_module(library(tfidf)). :- use_module(api(reconcile)). :- dynamic rank_cache/2. c_semantic_distance_rank(Video, Ranked) :- rank_cache(Video, Ranked). c_semantic_distance_rank(Video, Ranked) :- semantic_distance_rank(Video, Ranked), assert(rank_cache(Video, Ranked)). r_semantic_distance_rank(Video, Ranked) :- retractall(rank_cache(Video,_)), semantic_distance_rank(Video, Ranked), assert(rank_cache(Video, Ranked)). semantic_distance_rank(Video, Ranked) :- tag_rank(Video, Tags), tag_interpretations(Tags, I_Graph, Concepts), length(Concepts, Concept_Count), debug(semrank, 'concepts: ~w', [Concept_Count]), semantic_distance_graph(Concepts, C_Graph), append(I_Graph, C_Graph, Graph), length(Graph, Edge_Count), debug(semrank, 'distance graph size: ~w', [Edge_Count]), weighted_graph_rename(Graph, Assoc, Vector, Weights), length(Vector, V_Count), debug(semrank, 'vector: ~w', [V_Count]), page_rank(Vector, Weights, Scores0), Scores0 = [_EigenVector|Scores], assoc_to_list(Assoc, Pairs0), transpose_pairs(Pairs0, Pairs1), keysort(Pairs1, Pairs), pairs_values(Pairs, Resources), pairs_keys_values(Ranked0, Scores, Resources), keysort(Ranked0, Ranked1), reverse(Ranked1, Ranked). tag_interpretations(Tags, Edges, Concepts) :- rdf_equal(skos:'Concept', Type), findall(i(Tag,C,W), (member(W-Tag,Tags), %W > 0.001, reconcile(Tag, 10, Type, [], Hits), member(hit(D,C,_,_), Hits), D < 1.5 ), Edges), findall(C, member(i(_,C,_), Edges), Cs), sort(Cs, Concepts). page_rank(Vector, Weights, Rank) :- <- library(igraph), v <- Vector, w <- Weights, g <- graph(v), Rank <- 'page.rank(g, weights = w)$vector'. semantic_distance_graph(Concepts, Graph) :- cartesian(Concepts, Concepts, Cartesian), pair_distances(Cartesian, Graph). pair_distances([], []). pair_distances([[A,A]|T], Graph) :- !, pair_distances(T, Graph). pair_distances([[A,B]|T], [Rel|Graph]) :- Rel = i(A,B,Distance), semantic_distance(A,B,Distance), !, pair_distances(T, Graph). pair_distances([_|T], Graph) :- pair_distances(T, Graph). semantic_distance(A, B, _) :- rdf_has(A, rdfs:label, L), rdf_has(B, rdfs:label, L), !, fail. % do not add distance relations between diff senses of the same interpretation. semantic_distance(A, B, D) :- rdf_reachable(B, skos:broader, A, 3, N), !, D is 0.05^N. semantic_distance(A, B, D) :- rdf_reachable(A, skos:broader, B, 2, N), !, D is 0.001^N. semantic_distance(A, B, D) :- rdf_reachable(A, skos:related, B, 2, N), !, D is 0.01^N. semantic_distance(A, B, D) :- rdf(A, P, B), \+ rdfs_subproperty_of(P, skos:broader), \+ rdfs_subproperty_of(P, skos:narrower), \+ rdfs_subproperty_of(P, skos:related), !, D is 0.01. /* semantic_distance(A, B, D) :- rdf(A, _, C), ( B == C -> D = 0.5 ; rdf_reachable(B, skos:broader, C, 3, _) -> D = 0.25 ; rdf_reachable(C, skos:broader, B, 3, _) -> D = 0.5 ). */ %% semantic_distance(+L1, +L2, -Cartesian) % % Cartesian is the cartesian product of L1 and L2. cartesian([], _L, []). cartesian([A|N], L, M) :- pair(A,L,M1), cartesian(N, L, M2), append(M1, M2, M). pair(_A, [], []) :- !. pair(A, [B|L], [[A,B]|N] ) :- pair(A, L, N). %% weighted_graph_rename(+Graph, -Assoc, -NewGraph, -Weights) % % Rename nodes so that they start counted by 1. weighted_graph_rename(Graph, Assoc, NewGraph, Weights) :- empty_assoc(Assoc0), rename_vertices(Graph, Assoc0, 1, NewGraph, Assoc, Weights). rename_vertices([], Assoc, _, [], Assoc, []). rename_vertices([i(C1,C2,W)|T], Assoc0, N, [NewC1,NewC2|Rest], Assoc, [W|Weights]) :- rename_vertex(C1, Assoc0, N, Assoc1, N1, NewC1), rename_vertex(C2, Assoc1, N1, Assoc2, N2, NewC2), rename_vertices(T, Assoc2, N2, Rest, Assoc, Weights). rename_vertex(C, Assoc, N, Assoc, N, New) :- get_assoc(C, Assoc, New), !. rename_vertex(C, Assoc, N, Assoc1, N1, N) :- N1 is N+1, put_assoc(C, Assoc, N, Assoc1). mbh_pagerank_eval_table(V) :- ( m_video(V), rdf(V, dc:id, literal(Id)), video_mbh_terms(V, Terms), semantic_distance_rank(V, Concepts), length(Terms, TermCount), ( TermCount is 0 -> format('~w', [Id]) ; TermCount2 is TermCount*2, pairs_values(Concepts, Concepts1), topN(Concepts1, TermCount, Terms, TopN), topN(Concepts1, TermCount2, Terms, Top2N), format('~w,~2f,~2f~n', [Id,TopN, Top2N]) ), fail ; true ). topN(Tags, N, Terms, TopN) :- length(Terms, Term_Count), length(Tags, Tag_Count), ( Tag_Count > N -> length(Top_N_Tags, N), append(Top_N_Tags, _, Tags) ; Top_N_Tags = Tags ), term_match(Terms, Top_N_Tags, Intersect), length(Intersect, Intersect_Count), TopN is Intersect_Count/Term_Count. term_match([], _, []). term_match([Term|Ts], Concepts, [Term|Intersect]) :- member(C, Concepts), rdf_has(C, rdfs:label, literal(exact(Term), _)), !, term_match(Ts, Concepts, Intersect). term_match([_Term|Ts], Concepts, Intersect) :- term_match(Ts, Concepts, Intersect).